How embryos acclimate to temperature through epigenetic regulation

Author

Thomas O’Leary, Emily Mikucki, Sumaetee Tangwancharoen, Sara Helms Cahan, Seth Frietze, Brent L. Lockwood

Code

cs_pheno.R
Code
# ------------------------------------------------------------------------------
# Canton S acclimation phenotype figure
# March 31, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Description -----
# Canton S phenotype figure

# Load libraries
library(tidyverse)

# Load data
dat <- read_csv(here::here("data/raw/pheno/embryo_acc_pheno.csv"))

# Filter only Canton S data
dat <- dat |>
  filter(Genotype == "CantonS") |>
  filter(Stage == "Early")

# Count the number of eggs per acclimation treatment
dat |>
  group_by(Acclimation) |>
  summarise(total_eggs = sum(Number_Eggs))

# Acclimation effect
dat |>
  glm(Survival ~ Acclimation, 
      data = .,
      weights = Number_Eggs,
      family = binomial(link = "logit"))
00_mkref.sh
Code
#!/bin/bash

# Make a reference package for D. melanogaster ---------------------------------

# Directions for running this script -------------------------------------------
# 0. Be sure the following things have happened before beginning
#   - Download FASTA and GTF files for D. melanogaster from Ensembl
#   - Create a contig file according to 10X Genomics instructions
# 1. In the command line, run the following command: bash path/to/this/file.sh

# Set up directories and variables ---------------------------------------------

# Move to the directory where you want the output files to be saved
cd /netfiles02/lockwood_lab/heater/data/processed/seq/ref/

# Point towards installed cellranger-arc program
export PATH=/netfiles02/lockwood_lab/cellranger-arc/cellranger-arc-2.0.2:$PATH

# # Set variables for the script
# ref_path="/netfiles02/lockwood_lab/heater/data/processed/seq/ref/"
# libraries_path="/netfiles02/lockwood_lab/heater/data/raw/seq/libraries/"

# Create a raw unfiltered reference package ------------------------------------
cellranger-arc mkref --config=BDGP6.32.config

# Filter the GTF to restrict it to protein-coding, lncRNA, antisense, and 
#   immune-related genes as 10X Genomics has done for their human and mouse refs
cellranger-arc mkgtf Drosophila_melanogaster.BDGP6.32.109.gtf \
  Drosophila_melanogaster.BDGP6.32.109_filtered.gtf \
  --attribute=gene_biotype:protein_coding \
  --attribute=gene_biotype:lncRNA \
  --attribute=gene_biotype:antisense \
  --attribute=gene_biotype:IG_LV_gene \
  --attribute=gene_biotype:IG_V_gene \
  --attribute=gene_biotype:IG_V_pseudogene \
  --attribute=gene_biotype:IG_D_gene \
  --attribute=gene_biotype:IG_J_gene \
  --attribute=gene_biotype:IG_J_pseudogene \
  --attribute=gene_biotype:IG_C_gene \
  --attribute=gene_biotype:IG_C_pseudogene \
  --attribute=gene_biotype:TR_V_gene \
  --attribute=gene_biotype:TR_V_pseudogene \
  --attribute=gene_biotype:TR_D_gene \
  --attribute=gene_biotype:TR_J_gene \
  --attribute=gene_biotype:TR_J_pseudogene \
  --attribute=gene_biotype:TR_C_gene

# Create a filtered reference package ------------------------------------------
cellranger-arc mkref --config=BDGP6.32.filtered.config 
01_count.sh
Code
#!/bin/bash

# Directions for running this script -------------------------------------------
# 0. Be sure the following things have happened before beginning
#   - Demultiplex all samples
#   - Create reference package
#   - Create a separate libraries csv file pointing to demuxed fastq files
#   - Set up slurm.template file according to 10X Genomics instructions in 
#      cellranger-arc-2.0.2/external/martian/jobmanagers
# 1. In the command line, run the following command: sbatch path/to/this/file.sh

# Set up directories and variables ---------------------------------------------

# Move to the directory where you want the output files to be saved
cd /netfiles02/lockwood_lab/heater/data/processed/seq/samples/

# Point towards installed cellranger-arc program
export PATH=/netfiles02/lockwood_lab/cellranger-arc/cellranger-arc-2.0.2:$PATH

# Set variables for the script
ref_path="/netfiles02/lockwood_lab/heater/data/raw/seq/ref/BDGP6_32_filtered"
libraries_path="/netfiles02/lockwood_lab/heater/data/raw/seq/libraries/"
n_cores=32
mem=64

# Run cellranger-arc count for each sample -------------------------------------

# Sample 18°C Rep 1
cellranger-arc count --id=18C_Rep1 \
  --reference=${ref_path} \
  --libraries=${libraries_path}/18C_Rep1.csv \
  --localcores=${n_cores} \
  --localmem=${mem} \
  --jobmode=slurm
  
# Sample 18°C Rep 2
cellranger-arc count --id=18C_Rep2 \
  --reference=${ref_path} \
  --libraries=${libraries_path}/18C_Rep2.csv \
  --localcores=${n_cores} \
  --localmem=${mem} \
  --jobmode=slurm
 
# Sample 25°C Rep 1
cellranger-arc count --id=25C_Rep1 \
  --reference=${ref_path} \
  --libraries=${libraries_path}/25C_Rep1.csv \
  --localcores=${n_cores} \
  --localmem=${mem} \
  --jobmode=slurm
  
# Sample 25°C Rep 2
cellranger-arc count --id=25C_Rep2 \
  --reference=${ref_path} \
  --libraries=${libraries_path}/25C_Rep2.csv \
  --localcores=${n_cores} \
  --localmem=${mem} \
  --jobmode=slurm
02_aggr.sh
Code
#!/bin/bash

# Aggregate together all samples -----------------------------------------------

# Directions for running this script -------------------------------------------
# 0. Before running run cellranger-arc count for all samples and create 
#      aggr_libraries.csv file pointing to atac_fragments.tsv.gz and 
#      gex_molecule_info.h5 files per 10X Genomics instructions.
# 1. In the command line, run the following command: sbatch path/to/this/file.sh

# Request cluster resources ----------------------------------------------------

# Specify partition
#SBATCH --partition=bluemoon

# Request nodes
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=12

# Request memory for the entire job -- you can request --mem OR --mem-per-cpu
#SBATCH --mem=31000

# Reserve walltime -- hh:mm:ss -- 30 hrs max below
#SBATCH --time=30:00:00

# Name this job
#SBATCH --job-name=aggr_all

# Name output of this job using %x=job-name and %j=job-id
#SBATCH --output=logs/%x_%j.out 

# Echo some useful and interesting information 
echo "Starting sbatch script at: `date`"
echo "  running host:    ${SLURMD_NODENAME}"
echo "  assigned nodes:  ${SLURM_JOB_NODELIST}"
echo "  partition used:  ${SLURM_JOB_PARTITION}"
echo "  jobid:           ${SLURM_JOBID}"

# Set up directories and variables ---------------------------------------------

# Move to the directory where files will be aggregated
cd /netfiles02/lockwood_lab/heater/data/processed/seq/

# Point towards installed cellranger-arc program
export PATH=/netfiles02/lockwood_lab/cellranger-arc/cellranger-arc-2.0.2:$PATH

# Set variables for the script
ref_path="/netfiles02/lockwood_lab/heater/data/raw/seq/ref/BDGP6_32_filtered"
libraries_path="/netfiles02/lockwood_lab/heater/data/processed/seq/samples/aggr_libraries.csv"

# Run cellranger-arc aggr to aggregate all samples together --------------------
cellranger-arc aggr --id=all \
  --csv=${libraries_path} \
  --reference=${ref_path} \
  --normalize=depth
00_create_seurat_object.R
Code
# ------------------------------------------------------------------------------
# Load data and set up Seurat object
# May 11, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Description 
# Load in the RNA-Seq count data and the ATAC-Seq peak fragment data and save as
# an rds file to be used for quality control and filtering.

# Load libraries
library(tidyverse)
library(Seurat)
library(Signac)
library(AnnotationHub)

# Load the RNA-Seq and ATAC-Seq data -------------------------------------------
data_dir <- here::here("data/processed/seq/all/outs/")
raw_feature_dir <- paste0(data_dir, "raw_feature_bc_matrix")
frag_path <- paste0(data_dir, "atac_fragments.tsv.gz")
counts <- Read10X(raw_feature_dir)

# Get gene annotations for dm6 -------------------------------------------------
BDGP6.32 <- query(AnnotationHub(), 
                  c("EnsDb", "Drosophila melanogaster", "109"))[[1]]
annotation <- GetGRangesFromEnsDb(BDGP6.32)

# Meta data --------------------------------------------------------------------
# Create meta data for all cells based on the GEM well suffix. From 10X Genomics
# "This number, which indicates which GEM well the barcode sequence came from, 
# is called the GEM well suffix. The numbering of the GEM wells will reflect the 
# order that the GEM wells were provided in the Aggregation CSV."
meta_data <- tibble(cellnames = colnames(counts[[1]])) |> 
  mutate(orig.ident = str_extract(cellnames, "[1-4]")) |> 
  full_join(tibble::tribble(
    ~orig.ident, ~sample_name, ~acc_temp,
    "1", "18C_Rep1", "18°C",
    "2", "18C_Rep2", "18°C",
    "3", "25C_Rep1", "25°C",
    "4", "25C_Rep2", "25°C"
  ), 
  by = "orig.ident") |> 
  column_to_rownames("cellnames")

# Create a Seurat object containing the RNA data
dat <- CreateSeuratObject(
  project = "heater",
  counts = counts$`Gene Expression`,
  assay = "RNA",
  names.delim = "-",
  meta.data = meta_data
)

# Create ATAC assay and add it to the object
dat[["ATAC"]] <- CreateChromatinAssay(
  counts = counts$Peaks,
  sep = c(":", "-"),
  fragments = frag_path,
  annotation = annotation,
  validate.fragments = FALSE
)

# Save raw Seurat object
saveRDS(dat, here::here("data/processed/seurat_object/00_dat_raw.rds"))

# Create Seurat object from the filtered matrix from 10X Cell Ranger ARC -------

# Load filtered data
filtered_feature_dir <- paste0(data_dir, "filtered_feature_bc_matrix")
counts <- Read10X(filtered_feature_dir)

# Meta data for counts from filtered counts
meta_data <- tibble(cellnames = colnames(counts[[1]])) |>
  mutate(orig.ident = str_extract(cellnames, "[1-4]")) |>
  full_join(tibble::tribble(
    ~orig.ident, ~sample_name, ~acc_temp,
    "1", "18C_Rep1", "18°C",
    "2", "18C_Rep2", "18°C",
    "3", "25C_Rep1", "25°C",
    "4", "25C_Rep2", "25°C"
  ), 
  by = "orig.ident") |>
  column_to_rownames("cellnames")

# Create a Seurat object containing the RNA data
dat <- CreateSeuratObject(
  project = "heater",
  counts = counts$`Gene Expression`,
  assay = "RNA",
  names.delim = "-",
  meta.data = meta_data
)

# Create ATAC assay and add it to the object
dat[["ATAC"]] <- CreateChromatinAssay(
  counts = counts$Peaks,
  sep = c(":", "-"),
  fragments = frag_path,
  annotation = annotation
)

# Save 10X Cell Ranger ARC filtered Seurat object
saveRDS(dat, here::here("data/processed/seurat_object/00_dat_10x_cells.rds"))
01_quality_control_filtering.R
Code
# ------------------------------------------------------------------------------
# Quality control and filtering of low-quality cells
# March 27, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Description -----
# Basic quality control metrics and filtering out non-cell barcodes from the 10X 
# Cell Ranger ARC called cells.

# Load libraries
library(tidyverse)
library(Seurat)
library(Signac)

# Load 10X filtered data
dat <- readRDS(
  here::here("data/processed/seurat_object/00_dat_10x_cells.rds")
)

# Calculate Nucleosome Signal and TSS Enrichment for ATAC QC metrics
DefaultAssay(dat) <- "ATAC"
dat <- NucleosomeSignal(dat)
dat <- TSSEnrichment(dat)

# Calculate the percentage of RNA reads that map to mitochondrial genes 
DefaultAssay(dat) <- "RNA"
dat[["percent.mt"]] <- PercentageFeatureSet(
  dat,
  pattern = "^mt:"
)
# Calculate the percentage of RNA reads that map to ribosomal genes
dat[["percent.ribo"]] <- PercentageFeatureSet(
  data, 
  pattern = "^Rp[S|L]"
)

# Save object with Nucleosome, TSS, and mitochondria meta.data added 
saveRDS(dat, "data/processed/seurat_object/01_dat_10x_cells.rds")

# Filter out barcodes with low counts or high mitochondrial content ------------

# Define filtering thresholds
low_ATAC <- 800
low_RNA <- 200
max_mt <- 5 #### Calderon did 25 % for both mt and rb -- ask Seth? Maybe
#max_rb <- 5

# Subset 10x data based on thresholds
dat <- dat |>
  subset(
    nCount_RNA >= low_RNA &
      nCount_ATAC >= low_ATAC & 
      percent.mt < max_mt # &
      # percent.ribo < max_rb
    )

# Save object
saveRDS(dat, "data/processed/seurat_object/01_dat_10x_filtered.rds")

# Create a filtered fragment file to be used in the amulet doublet finder ------

# Path to fragment file with all barcodes included
data_dir <- here::here("data/processed/seq/all/outs/")
frag_path <- paste0(data_dir, "atac_fragments.tsv.gz")

# Write the file with fragments from only the filtered barcodes
FilterCells(frag_path,
            Cells(dat),
            outfile = paste0(data_dir, "atac_fragments_cells.tsv.gz"))
02_initial_cluster.R
Code
# ------------------------------------------------------------------------------
# Dimension reduction and clustering
# May 11, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
library(tidyverse)
library(Seurat)
library(Signac)
library(clustree)

# Load data
dat <- readRDS(
  here::here("data/processed/seurat_object/01_dat_10x_filtered.rds")
)

# Run the standard workflow for visualization and clustering -------------------
# This initial clustering is just for the multiplets plots

# Set RNA to the default assay set for the following set of operations 
DefaultAssay(dat) <- "RNA"

# Log-Normalize data
dat <- NormalizeData(dat)

# Find variable features
dat <- FindVariableFeatures(dat)

# Scale and center data
dat <- ScaleData(dat)

# Run principle component analysis 
dat <- RunPCA(dat)

# Run UMAP
dat <- RunUMAP(dat, dims = 1:30)

# Construct nearest-neighbor graph
dat <- FindNeighbors(dat, dims = 1:30)

# Finding the correct resolution of cluster -----
# Run on a bunch of different resolutions
dat <- FindClusters(dat, resolution = seq(0.01, 0.1, 0.01))
# Look at the cluster tree
clustree::clustree(dat)

# Remove the RNA_snn columns added
dat@meta.data <- dat@meta.data |>
  select(!contains("RNA_snn"))

# Set the final clusters
dat <- FindClusters(dat, resolution = 0.03)

# Save data
saveRDS(
  dat, 
  here::here("data/processed/seurat_object/02_dat_initial_cluster.rds")
)
04_doublet_finder.R
Code
# ------------------------------------------------------------------------------
# Detecting multiplets in the multiome data sets
# DoubleFinder and amulet
# May 22, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
library(tidyverse)
library(Seurat)
library(DoubletFinder)

# DoubletFinder for RNA doublets -----------------------------------------------

# Define expected doublet rate from 10X estimates
doublet_rate <- 0.008/1000
exp_double_rate <- doublet_rate*4000

# Each sample must be done separately per the instructions of DoubletFinder

# 18C_Rep1 ---------------------------------------------------------------------

# Load data
dat <- readRDS(
  here::here("data/processed/seurat_object/02_dat_10x_filtered.rds")) |>
  subset(sample_name == "18C_Rep1")

# Pre-process Seurat object ----------------------------------------------------
DefaultAssay(dat) <- "RNA"
dat <- NormalizeData(dat)
dat <- FindVariableFeatures(dat, selection.method = "vst", nfeatures = 2000)
dat <- ScaleData(dat)
dat <- RunPCA(dat)
dat <- RunUMAP(dat, dims = 1:10)
dat <- FindNeighbors(dat, dims = 1:10)
dat <- FindClusters(dat, resolution = 0.1)

# pK Identification ------------------------------------------------------------
sweep.res.list_dat <- paramSweep_v3(dat, PCs = 1:10, sct = FALSE)
sweep.stats_dat <- summarizeSweep(sweep.res.list_dat, GT = FALSE)
bcmvn_dat <- find.pK(sweep.stats_dat)

# Homotypic Doublet Proportion Estimate ----------------------------------------
homotypic.prop <- modelHomotypic(dat@meta.data$seurat_clusters)
nExp_poi <- round(exp_double_rate * nrow(dat@meta.data))
nExp_poi.adj <- round(nExp_poi*(1 - homotypic.prop))

# Run DoubletFinder with varying classification stringencies -------------------
dat <- doubletFinder_v3(dat, 
                        PCs = 1:10,
                        pK = 0.23,
                        nExp = nExp_poi.adj,
                        reuse.pANN = FALSE, 
                        sct = FALSE)

# Save the classified doublets
doublets_18C_Rep1 <- dat@meta.data |>
  subset(DF.classifications_0.25_0.23_61 == "Doublet") |>
  rownames()

# 18C_Rep2 ---------------------------------------------------------------------

# Load data
dat <- readRDS(
  here::here("data/processed/seurat_object/02_dat_10x_filtered.rds")) |>
  subset(sample_name == "18C_Rep2")

# Pre-process Seurat object ----------------------------------------------------
DefaultAssay(dat) <- "RNA"
dat <- NormalizeData(dat)
dat <- FindVariableFeatures(dat, selection.method = "vst", nfeatures = 2000)
dat <- ScaleData(dat)
dat <- RunPCA(dat)
dat <- RunUMAP(dat, dims = 1:10)
dat <- FindNeighbors(dat, dims = 1:10)
dat <- FindClusters(dat, resolution = 0.1)

# pK Identification ------------------------------------------------------------
sweep.res.list_dat <- paramSweep_v3(dat, PCs = 1:10, sct = FALSE)
sweep.stats_dat <- summarizeSweep(sweep.res.list_dat, GT = FALSE)
bcmvn_dat <- find.pK(sweep.stats_dat)

# Homotypic Doublet Proportion Estimate ----------------------------------------
homotypic.prop <- modelHomotypic(dat@meta.data$seurat_clusters)
nExp_poi <- round(exp_double_rate * nrow(dat@meta.data))
nExp_poi.adj <- round(nExp_poi*(1 - homotypic.prop))

# Run DoubletFinder with varying classification stringencies -------------------
dat <- doubletFinder_v3(dat, 
                        PCs = 1:10,
                        pK = 0.005,
                        nExp = nExp_poi.adj,
                        reuse.pANN = FALSE, 
                        sct = FALSE)

# Save the classified doublets
doublets_18C_Rep2 <- dat@meta.data |>
  subset(DF.classifications_0.25_0.005_103 == "Doublet") |>
  rownames()


# 25C_Rep1 ---------------------------------------------------------------------

# Load data
dat <- readRDS(
  here::here("data/processed/seurat_object/02_dat_10x_filtered.rds")) |>
  subset(sample_name == "25C_Rep1")

# Pre-process Seurat object ----------------------------------------------------
DefaultAssay(dat) <- "RNA"
dat <- NormalizeData(dat)
dat <- FindVariableFeatures(dat, selection.method = "vst", nfeatures = 2000)
dat <- ScaleData(dat)
dat <- RunPCA(dat)
dat <- RunUMAP(dat, dims = 1:10)
dat <- FindNeighbors(dat, dims = 1:10)
dat <- FindClusters(dat, resolution = 0.1)

# pK Identification ------------------------------------------------------------
sweep.res.list_dat <- paramSweep_v3(dat, PCs = 1:10, sct = FALSE)
sweep.stats_dat <- summarizeSweep(sweep.res.list_dat, GT = FALSE)
bcmvn_dat <- find.pK(sweep.stats_dat)

# Homotypic Doublet Proportion Estimate ----------------------------------------
homotypic.prop <- modelHomotypic(dat@meta.data$seurat_clusters)
nExp_poi <- round(exp_double_rate * nrow(dat@meta.data))
nExp_poi.adj <- round(nExp_poi*(1 - homotypic.prop))

# Run DoubletFinder with varying classification stringencies -------------------
dat <- doubletFinder_v3(dat, 
                        PCs = 1:10,
                        pK = 0.3,
                        nExp = nExp_poi.adj,
                        reuse.pANN = FALSE, 
                        sct = FALSE)

# Save the classified doublets
doublets_25C_Rep1 <- dat@meta.data |>
  subset(DF.classifications_0.25_0.3_63 == "Doublet") |>
  rownames()

# 25C_Rep2 ---------------------------------------------------------------------

# Load data
dat <- readRDS(
  here::here("data/processed/seurat_object/02_dat_10x_filtered.rds")) |>
  subset(sample_name == "25C_Rep2")

# Pre-process Seurat object ----------------------------------------------------
DefaultAssay(dat) <- "RNA"
dat <- NormalizeData(dat)
dat <- FindVariableFeatures(dat, selection.method = "vst", nfeatures = 2000)
dat <- ScaleData(dat)
dat <- RunPCA(dat)
dat <- RunUMAP(dat, dims = 1:10)
dat <- FindNeighbors(dat, dims = 1:10)
dat <- FindClusters(dat, resolution = 0.1)

# pK Identification ------------------------------------------------------------
sweep.res.list_dat <- paramSweep_v3(dat, PCs = 1:10, sct = FALSE)
sweep.stats_dat <- summarizeSweep(sweep.res.list_dat, GT = FALSE)
bcmvn_dat <- find.pK(sweep.stats_dat)

# Homotypic Doublet Proportion Estimate ----------------------------------------
homotypic.prop <- modelHomotypic(dat@meta.data$seurat_clusters)
nExp_poi <- round(exp_double_rate * nrow(dat@meta.data))
nExp_poi.adj <- round(nExp_poi*(1 - homotypic.prop))

# Run DoubletFinder with varying classification stringencies -------------------
dat <- doubletFinder_v3(dat, 
                        PCs = 1:10,
                        pK = 0.01,
                        nExp = nExp_poi.adj,
                        reuse.pANN = FALSE, 
                        sct = FALSE)

# Save the classified doublets
doublets_25C_Rep2 <- dat@meta.data |>
  subset(DF.classifications_0.25_0.01_72 == "Doublet") |>
  rownames()

doublets <- c(doublets_18C_Rep1,
              doublets_18C_Rep2,
              doublets_25C_Rep1,
              doublets_25C_Rep2)

# Save doublets
saveRDS(doublets, here::here("data/processed/qc/doublet_finder.rds"))
05_rm_multiplets.R
Code
# ------------------------------------------------------------------------------
# Remove multiplets
# May 23, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
require(tidyverse)
require(Seurat)

# Load multiplet barcodes
doublet_amulet <- readRDS(
  here::here("data/processed/qc/doublet_amulet.rds")
  ) |>
  filter(q.value < 0.05) |>
  rownames()
doublet_finder <- readRDS(here::here("data/processed/qc/doublet_finder.rds"))

# Load seurat object
dat <- readRDS(
  here::here("data/processed/seurat_object/02_dat_initial_cluster.rds")
) 

# Mark amulet described multiplet barcodes
dat$doublet_amulet <- ifelse(colnames(dat) %in% doublet_amulet, 
                             "Multiplet",
                             "Singlet")

# Mark amulet described multiplet barcodes
dat$doublet_finder <- ifelse(colnames(dat) %in% doublet_finder,
                             "Multiplet",
                             "Singlet")

# Save dat with multiplets marked
saveRDS(dat, here::here("data/processed/seurat_object/05_dat_multiplets.rds"))

# Remove all multiplets from the data set
dat <- dat |>
  subset(doublet_amulet == "Singlet" & doublet_finder == "Singlet")

# There are still two outlier barcodes with exceptionally high nCount_RNA when
# compared to the remaining high quality nuclei – 12-13k compared to 6-7k for 
# the next highest cells -- so let's remove those two outliers now
dat <- dat |> 
  subset(nCount_RNA < 10000)

# Remove cluster info from that initial clustering
dat@meta.data <- dat@meta.data |>
  select(-seurat_clusters) |>
  select(!contains("snn"))

# Save dat with multiplets removed
saveRDS(dat, here::here("data/processed/seurat_object/05_dat_filtered.rds"))
06_qc_data_cells.R
Code
# ------------------------------------------------------------------------------
# Quality control and filtering of low-quality cells
# May 25, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Description -----
# Quality control metrics of final cells

# Load libraries
library(tidyverse)
library(Seurat)
library(Signac)
library(AnnotationHub)

# Load 10X filtered data
dat <- readRDS(
  here::here("data/processed/seurat_object/05_dat_filtered.rds")
)

# Load path to fragment file for later counting
data_dir <- here::here("data/processed/seq/all/outs/")
frag_path <- paste0(data_dir, "atac_fragments.tsv.gz")

# Get gene annotations for dm6 -------------------------------------------------
BDGP6.32 <- query(AnnotationHub(), 
                  c("EnsDb", "Drosophila melanogaster", "109"))[[1]]
annotation <- GetGRangesFromEnsDb(BDGP6.32)
seqlevelsStyle(annotation) <- 'UCSC'

# Calculate additional QC metrics ----------------------------------------------

# Calculate TSS Enrichment matrix for the cells that are left
DefaultAssay(dat) <- "ATAC"
dat <- TSSEnrichment(dat, fast = FALSE)

# Counting fraction of reads in peaks -----

# Count fragments
total_fragments <- CountFragments(
  fragments = frag_path, 
  cells = Cells(dat)
)

# Add to metadata
dat$fragments <- total_fragments[total_fragments$CB == colnames(dat), 
                                 "frequency_count"]

# Peak calling -----------------------------------------------------------------
# Installed macs2 using the R package Herper

# Path to miniconda
path_to_miniconda <-
  "/slipstream_old/home/thomasoleary/.local/share/r-miniconda" 
# Path to macs2 for CallPeaks function
macs2_path <- paste0(path_to_miniconda, 
                     "/envs/PeakCalling_analysis/bin/macs2")

# Call peaks
peaks <- CallPeaks(
  dat, 
  macs2.path = macs2_path,
  effective.genome.size = 1.25e8
)

# Create the peak matrix
peak_matrix <- FeatureMatrix(
  fragments = Fragments(dat),
  features = peaks
)

# Create a new assay using the MACS2 peak set and add it to the Seurat object
dat[["peaks"]] <- CreateChromatinAssay(
  counts = peak_matrix,
  fragments = frag_path,
  annotation = annotation
)

# Calculate fraction of reads in peaks
dat <- FRiP(
  dat,
  assay = "peaks", 
  total.fragments = "fragments"
)

# Fraction of reads in TSS region ----------------------------------------------

# Get the TSS sites
TSS_sites <- GetTSSPositions(
  dat@assays$ATAC@annotation
)

# # Get the 2000 bp upstream and 200 bp downstream region with promoter
# # This is how TSS was defined in Calderon et al. 2022
promoter_region <- promoters(TSS_sites)

dat$nCount_TSS <- CountsInRegion(
  dat, 
  assay = "ATAC", 
  regions = TSS_sites
)

dat$nCount_promoter <- CountsInRegion(
  dat, 
  assay = "ATAC", 
  regions = promoter_region
)

# Calculate
dat$FRiT <- dat$nCount_TSS/dat$fragments

# Save seurat object with added info
saveRDS(dat, here::here("data/processed/seurat_object/06_dat_qc.rds"))
07_cluster.R
Code
# ------------------------------------------------------------------------------
# Dimension reduction and clustering
# May 30, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
library(tidyverse)
library(Seurat)
library(Signac)
library(clustree)

# Load data
dat <- readRDS(
  here::here("data/processed/seurat_object/06_dat_qc.rds")
)

# Joint Clustering based on Signac tutorial ------------------------------------
# https://stuartlab.org/signac/articles/pbmc_multiomic.html

# RNA data processing
DefaultAssay(dat) <- "RNA"
dat <- SCTransform(dat)
dat <- RunPCA(dat)

# ATAC data processing
DefaultAssay(dat) <- "peaks"
dat <- FindTopFeatures(dat, min.cutoff = 5)
dat <- RunTFIDF(dat)
dat <- RunSVD(dat)

# The first lsi component often captures sequencing depth rather than biological 
# variation so we should not use this first component in further analysis.
# See plot below for ~-1 corr with depth and the first component
DepthCor(dat)

# Build a joint neighbor graph using both assays
dat <- FindMultiModalNeighbors(
  object = dat,
  reduction.list = list("pca", "lsi"), 
  dims.list = list(1:50, 2:40),
  verbose = TRUE
)

# Build a joint UMAP
dat <- RunUMAP(
  object = dat,
  nn.name = "weighted.nn",
  assay = "RNA",
  verbose = TRUE
)

# Build a joint t-SNE
dat <- RunTSNE(
  object = dat,
  nn.name = "weighted.nn",
  assay = "RNA",
  verbose = TRUE
)

# Find Clusters based on the LSI
dat <- FindClusters(dat, 
                    resolution = 0.2,
                    graph.name = "wknn", 
                    algorithm = 3)

# Save data
saveRDS(dat, here::here("data/processed/seurat_object/07_dat_cluster.rds"))

################################################################################
# To see how and if the dimensional reduction of only ATAC or RNA libraries 
# looks different -- save these data below

# RNA-only dim-reduction -------------------------------------------------------
# Load data
dat <- readRDS(
  here::here("data/processed/seurat_object/07_dat_cluster.rds")
)

# RNA data processing
DefaultAssay(dat) <- "RNA"
dat <- SCTransform(dat)
dat <- RunUMAP(RunPCA(dat), dims = c(1:50))

# Save dat with RNA only UMAP
saveRDS(dat,
  here::here("data/processed/seurat_object/07_dat_rna_umap.rds")
)

# ATAC-only dim-reduction ------------------------------------------------------
# Load data
dat <- readRDS(
  here::here("data/processed/seurat_object/07_dat_cluster.rds")
)
 
# ATAC UMAP quick
DefaultAssay(dat) <- "peaks"
dat <- dat |> 
  ScaleData() |>
  RunPCA() |>
  RunUMAP(dims = c(1:50))

# Save dat with ATAC only UMAP
saveRDS(dat,
  here::here("data/processed/seurat_object/07_dat_atac_umap.rds")
)
08_cluster_markers.R
Code
# ------------------------------------------------------------------------------
# Differential expression and accessibility
# June 13, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
library(tidyverse)
library(Seurat)
library(Signac)

# Load data
dat <- readRDS(here::here("data/processed/seurat_object/07_dat_cluster.rds"))

# Find markers of each cluster that are conserved between the two conditions ---

# Create empty list to populate in the for loop
markers <- list()

# Loop through each cluster individually
for (i in levels(dat)) {
  markers[[i]] <- FindConservedMarkers(dat, 
                                       ident.1 = i,
                                       grouping.var = "acc_temp",
                                       only.pos = TRUE) |> 
    rownames_to_column("gene")
}

# Combine markers for all clusters
markers <- bind_rows(markers, 
                     .id = "cluster")

# Number of markers per cluster
markers %>%
  filter(max_pval < 0.05) |> 
  group_by(cluster) |> 
  tally()

# Save markers object
saveRDS(markers, here::here("data/processed/genes/markers.rds"))
09_cluster_annotation.R
Code
# ------------------------------------------------------------------------------
# Annotating Seurat Clusters using Fisher's enrichment of BDGP in situ data
# June 12, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
library(tidyverse)

# Load data --------------------------------------------------------------------

# Berkeley Drosophila Genome Project in situ database
# https://insitu.fruitfly.org/insitu-mysql-dump/insitu_annot.csv.gz
insitu_annot <- read_csv(here::here("data/raw/annot/insitu_annot.csv"))

# Cluster annotations from Calderon data at 4 to 6 hours 
cluster_annot_calderon <- read_csv(
  here::here("data/raw/annot/calderon_cluster_annot.csv")) |>
  filter(`time window (modeled)` == "4-6") |>
  dplyr::rename(cluster = `cluster #`) |>
  dplyr::select(cluster, `selected annotation (manual)`) |> 
  distinct(`selected annotation (manual)`)

# Marker genes from the Seurat clusters -- all markers with pval < 0.05
markers <- readRDS(here::here("data/processed/genes/markers.rds")) |> 
  group_by(cluster) |> 
  filter(max_pval < 0.05)


# Fisher's Exact Test for enrichment of cell-type specific marker genes --------

# Reference panel of a specific level of the in situ data
ref_panel <- insitu_annot
cluster_annot <- NULL

pb <- progress::progress_bar$new(
  total = length(unique(markers$cluster)) * length(unique(ref_panel$annot)))
# Loop through each cluster present in the query markers data
for (cluster_i in unique(markers$cluster)) {
  # Unique markers for a cluster
  query_markers <- markers |> 
    filter(cluster == cluster_i) |> 
    ungroup() |> 
    distinct(gene) |> 
    deframe()
  
  # Loop through each annotation type in the reference
  for (ref_annot_j in unique(ref_panel$annot)) {
    
    # Distinct reference markers for this annotation
    ref_markers <- ref_panel |> 
      filter(annot == ref_annot_j) |> 
      distinct(gene) |> 
      deframe()
    
    # Collect numbers for the Fisher's exact test contingency table ------------
    
    # Number of markers in common
    n_common <- length(intersect(query_markers, ref_markers))
    # Number unique to the query
    n_query_unique <- length(setdiff(query_markers, ref_markers))
    # Number unique to the reference
    n_ref_unique <- length(setdiff(ref_markers, query_markers))     
    # Number remaining
    n_remain <- length(unique(c(unique(markers$gene), rownames(ref_panel)))) - 
      n_common - n_query_unique - n_ref_unique
    
    # Proportion of query markers that are shared with the reference
    prop_overlap <- n_common/length(query_markers)
    
    # Contingency table
    con_table <- matrix(
      c(n_common, 
        n_query_unique, 
        n_ref_unique, 
        n_remain), 
      ncol = 2)
    
    # Run the Fisher's exact test and save the p-value
    fet_pval <- fisher.test(con_table)$p.value
    
    # Store results
    df <- tibble(
      cluster = cluster_i,
      annot = ref_annot_j,
      prop_overlap = prop_overlap,
      pval = fet_pval
    )
    
    # Bind results together into a dataframe
    cluster_annot <- bind_rows(df, cluster_annot)
    
    # Print progress bar
    pb$tick()
  }
}

pb$terminate()

# Adjusted pval for FDR correction
cluster_annot <- cluster_annot %>%
  mutate(padj = p.adjust(pval, method = "BH"))

# Save the full annotation results
saveRDS(cluster_annot, here::here("data/processed/annot/cluster_annot_all.rds"))

# Manual annotations by TSO using the 11 annotations present in at 4 to 6 hours
# of development in the Calderon data
cluster_annot_manual <- c(
  "0" = "ectoderm primordium",
  "1" = "mesoderm primordium",
  "2" = "ventral nerve cord primordium",
  "3" = "endoderm primordium",
  "4" = "ectoderm primordium",
  "5" = "muscle system primordium",
  "6" = "tracheal primordium",
  "7" = "ventral nerve cord primordium",
  "8" = "plasmatocytes anlage",
  "9" = "amnioserosa",
  "10" = "unknown"
)

# Add in the consensus results
cluster_annot_manual <- cluster_annot |> 
  filter(padj < 0.05) |> 
  mutate(cluster = as.numeric(cluster)) |> 
  group_by(cluster) |> 
  slice_min(padj, n = 3) |> 
  summarise(top_3 = paste0(annot, collapse = "; ")) |> 
  mutate(annot_manual = cluster_annot_manual) |> 
  select(cluster, annot_manual, top_3)

# Save the consensus results
saveRDS(
  cluster_annot_manual, 
  here::here("data/processed/annot/cluster_annot_manual.rds")
)
10_link_peaks.R
Code
# ------------------------------------------------------------------------------
# Link peaks to genes
# June 06, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
library(tidyverse)
library(Seurat)
library(Signac)
library(BSgenome.Dmelanogaster.UCSC.dm6)

# Load data
dat <- readRDS(here::here("data/processed/seurat_object/07_dat_cluster.rds"))
seqlevelsStyle(BSgenome.Dmelanogaster.UCSC.dm6) <- "NCBI"

# Set assay
DefaultAssay(dat) <- "peaks"

# Compute the GC content for each peak
dat <- RegionStats(
  dat, 
  genome = BSgenome.Dmelanogaster.UCSC.dm6
)

# Link peaks to genes
dat <- LinkPeaks(
  object = dat,
  peak.assay = "peaks",
  expression.assay = "SCT"
)

# Save data
saveRDS(dat, here::here("data/processed/seurat_object/10_dat_linked.rds"))
11_pseudobulk_DESeq2.R
Code
# ------------------------------------------------------------------------------
# Pseudobulk analysis
# June 08, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
require(tidyverse)
require(Seurat)
require(Signac)
require(DESeq2)

# Load data
dat <- readRDS(
  here::here("data/processed/seurat_object/06_dat_qc.rds")
)

# Aggregate samples data into counts matrix
counts <- AggregateExpression(dat, 
                              group.by = "sample_name",
                              assays = "RNA",
                              slot = "counts",
                              return.seurat = FALSE)[[1]]

# Create metadata for counts matrix
metadata <- tibble(
  samples = c("18C_Rep1","18C_Rep2","25C_Rep1","25C_Rep1"),
  acc_temp = as.factor(c("18C", "18C", "25C", "25C"))
)

# Create DESeq Data Set
dds <- DESeqDataSetFromMatrix(countData = counts,
                              colData = metadata,
                              design = ~ acc_temp)

# Filter out genes with low counts across the four samples
keep_genes <- rowSums(counts(dds)) >= 10
dds <- dds[keep_genes,]

# Run DESeq2
dds <- DESeq(dds)

# Generate results object
res <- results(dds, name = "acc_temp_25C_vs_18C")
deg_res <- res |>
  as_tibble(rownames = "gene")
degs <- res |>
  as_tibble(rownames = "gene") |>
  dplyr::filter(padj< 0.05) |>
  arrange(padj)

# Save results
saveRDS(res, here::here("output/degs/pseudobulk_DESeq_res.rds"))
saveRDS(degs, here::here("output/degs/pseudobulk_DESeq.rds"))
write_csv(degs |> select(gene), here::here("output/degs/pseudobulk_DESeq_gene_names.csv"))
pheno.R
Code
# ------------------------------------------------------------------------------
# Canton S acclimation phenotype figure
# March 31, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Description -----
# Canton S phenotype figure

# Load libraries
library(tidyverse)

# Load data
dat <- read_csv(here::here("data/raw/pheno/embryo_acc_pheno.csv"))

# Filter only Canton S data
dat <- dat |>
  filter(Genotype == "CantonS") |>
  filter(Stage == "Early")

# Count the number of eggs per acclimation treatment
dat |>
  group_by(Acclimation) |>
  summarise(total_eggs = sum(Number_Eggs))

# Plot Canton S Survival
dat |>
  mutate(Acclimation = paste0(Acclimation, "°C")) |>
  ggplot(aes(x = Acclimation,
             y = Survival*100)) +
  geom_boxplot(width = 0.3,
               color = "grey20",
               outlier.shape = NA) +
  ggbeeswarm::geom_beeswarm(size = 3,
                            cex = 2,
                            shape = 21,
                            fill = "grey50",
                            color = "grey20",
                            alpha = 0.9) +
  xlab("Acclimation temperature") +
  scale_y_continuous(name = "Survival after acute heat shock",
                     limits = c(0, 100),
                     expand = expansion(mult = c(0, 0.05)),
                     labels = function(x) paste0(x, "%")) + 
  cowplot::theme_minimal_hgrid(font_family = "Myriad Pro")


# Save two sizes
ggsave(here::here("output/figs/pheno/cantonS_survival.png"), 
       height = 16, 
       width = 24,
       units = "cm")
ggsave(here::here("output/figs/pheno/cantonS_survival_small.png"), 
       height = 12, 
       width = 18,
       units = "cm")
qc.R
Code
# ------------------------------------------------------------------------------
# Knee plot figures
# March 27, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Description -----
# Basic quality control metrics and filtering out non-cell barcodes

# Load libraries
library(tidyverse)
library(Seurat)
library(Signac)

# Load raw data
dat_raw <- readRDS(
  here::here("data/processed/seurat_object/00_dat_raw.rds")
)

# Load 10X Cell Ranger ARC called cells
dat_10x <- readRDS(
  here::here("data/processed/seurat_object/00_dat_10x_cells.rds")
)

# Load filtered cells - mutiplets removed
dat <- readRDS(
  here::here("data/processed/seurat_object/05_dat_filtered.rds")
)

# Define count cutoffs
low_ATAC <- 800
low_RNA <- 200

# Knee-plot for ATAC -----------------------------------------------------------
p1 <- tibble(nCount_ATAC = sort(dat_raw$nCount_ATAC,
                          decreasing = TRUE)) |>
  rownames_to_column("rank") |>
  mutate(rank = as.numeric(rank)) |>
  head(100000) |>
  ggplot() +
  geom_point(aes(x = rank,
                 y = nCount_ATAC),
             color = "grey50",
             shape = 21) +
  geom_hline(aes(yintercept = low_ATAC),
             linetype = 2,
             color = "grey20",
             alpha = 0.8,
             linewidth = 1.1) +
  scale_y_continuous(trans = "log10", 
                     breaks = c(100, 1000, 10000, 100000),
                     labels = c("100", "1k", "10k", "100k"),
                     name = "ATAC counts") +
  scale_x_continuous(trans = "log10",
                     breaks = c(1, 10, 100, 1000, 10000, 100000),
                     labels = c("1", "10", "100", "1k", "10k", "100k"),
                     name = "Barcodes in rank order") +
  cowplot::theme_minimal_grid()

# Knee-plot for RNA ------------------------------------------------------------
p2 <- tibble(nCount_RNA = sort(dat_raw$nCount_RNA,
                         decreasing = TRUE)) |>
  rownames_to_column("rank") |>
  mutate(rank = as.numeric(rank)) |>
  head(100000) |>
  ggplot() +
  geom_point(aes(x = rank,
                 y = nCount_RNA),
             color = "grey50",
             shape = 21) +
  geom_hline(aes(yintercept = low_RNA),
             linetype = 2,
             color = "grey20",
             alpha = 0.8,
             linewidth = 1.1) +
  scale_y_continuous(trans = "log10", 
                     breaks = c(100, 1000, 10000, 100000),
                     labels = c("100", "1k", "10k", "100k"),
                     name = "RNA counts") +
  scale_x_continuous(trans = "log10",
                     breaks = c(1, 10, 100, 1000, 10000, 100000),
                     labels = c("1", "10", "100", "1k", "10k", "100k"),
                     name = "Barcodes in rank order") +
  cowplot::theme_minimal_grid()

# Knee plots together
cowplot::plot_grid(p1, p2,
                   nrow = 2)

# Save knee plot
ggsave(here::here("output/figs/qc/knee_plots.pdf"),
       height = 15,
       width = 15,
       units = "cm")
ggsave(here::here("output/figs/qc/knee_plots.png"),
       height = 15,
       width = 15,
       units = "cm")

# Scatter plot of ATAC & RNA counts --------------------------------------------

# Cells called by Cell Ranger ARC
dat_raw@meta.data |>
  filter(nCount_RNA >= 1 &
           nCount_ATAC >= 1) |>
  rownames_to_column("cells") |>
  mutate(cells_10x = ifelse(cells %in% Cells(dat_10x), 
                            "Cell Ranger ARC cells", 
                            "Non-cell barcodes")) |>
  mutate(cells_10x = factor(cells_10x,
                            levels = c("Non-cell barcodes",
                                       "Cell Ranger ARC cells"))) |>
  ggplot(aes(x = nCount_RNA,
             y = nCount_ATAC)) +
  geom_point(aes(color = cells_10x),
             shape = 21) +
  scale_x_continuous(trans = "log10", 
                     breaks = c(1, 10, 100, 1000, 10000, 100000),
                     labels = c("1", "10", "100", "1k", "10k", "100k"),
                     name = "RNA counts") +
  scale_y_continuous(trans = "log10", 
                     breaks = c(1, 10, 100, 1000, 10000, 100000),
                     labels = c("1", "10", "100", "1k", "10k", "100k"),
                     name = "ATAC counts") +
  scale_color_manual(values = c("grey90", "#91AB73"),
                     name = element_blank()) +
  cowplot::theme_minimal_grid() +
  theme(legend.position = "bottom")

# Save scatter plot
ggsave(here::here("output/figs/qc/scatter_rna_atac_10x_cells.pdf"),
       height = 20,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/qc/scatter_rna_atac_10x_cells.png"),
       height = 20,
       width = 20,
       units = "cm")

# Cells called by Cell Ranger ARC & low count threshold
dat_raw@meta.data |>
  filter(nCount_RNA >= 1 &
           nCount_ATAC >= 1) |>
  rownames_to_column("cells") |>
  mutate(cells_10x = ifelse(cells %in% Cells(dat_10x) &
                              nCount_RNA >= low_RNA &
                              nCount_ATAC >= low_ATAC, 
                            "Cells", 
                            "Non-cell barcodes")) |>
  mutate(cells_10x = factor(cells_10x,
                            levels = c("Non-cell barcodes",
                                       "Cells"))) |>
  ggplot(aes(x = nCount_RNA,
             y = nCount_ATAC)) +
  geom_point(aes(color = cells_10x),
             shape = 21) +
  geom_segment(aes(x = low_RNA, xend = low_RNA,
                   y = low_ATAC, yend = 150000),
             linetype = 2,
             color = "grey50",
             alpha = 0.8,
             linewidth = 1.1) +
  geom_segment(aes(y = low_ATAC, yend = low_ATAC,
                   x = low_RNA, xend = 50000),
               linetype = 2,
               color = "grey50",
               alpha = 0.8,
               linewidth = 1.1) +
  scale_x_continuous(trans = "log10", 
                     breaks = c(1, 10, 100, 1000, 10000, 100000),
                     labels = c("1", "10", "100", "1k", "10k", "100k"),
                     name = "RNA counts") +
  scale_y_continuous(trans = "log10", 
                     breaks = c(1, 10, 100, 1000, 10000, 100000),
                     labels = c("1", "10", "100", "1k", "10k", "100k"),
                     name = "ATAC counts") +
  scale_color_manual(values = c("grey90", "#91AB73"),
                     name = element_blank()) +
  cowplot::theme_minimal_grid() +
  theme(legend.position = "bottom")

# Save scatter plot
ggsave(here::here("output/figs/qc/scatter_rna_atac_cells_threshold.pdf"),
       height = 20,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/qc/scatter_rna_atac_cells_threshold.png"),
       height = 20,
       width = 20,
       units = "cm")

# Violin plot of QC metrics ----------------------------------------------------

# 10x cells
p1 <- dat_10x@meta.data |>
  mutate(project = "heater") |>
  ggplot() +
  geom_violin(aes(y = nCount_RNA, 
                  x = project),
              fill = "grey80",
              color = "grey20") +
  scale_x_discrete(label = "RNA Counts",
                   name = element_blank()) +
  scale_y_continuous(labels = scales::label_number(suffix = "k", scale = 1e-3),
                     name = element_blank()) +
  theme_classic()

p2 <- dat_10x@meta.data |>
  mutate(project = "heater") |>
  ggplot() +
  geom_violin(aes(y = nCount_ATAC, 
                  x = project),
              fill = "grey80",
              color = "grey20") +
  scale_x_discrete(label = "ATAC Counts",
                   name = element_blank()) +
  scale_y_continuous(labels = scales::label_number(suffix = "k", scale = 1e-3),
                     name = element_blank()) +
  theme_classic()

p3 <- dat_10x@meta.data |>
  mutate(project = "heater") |>
  ggplot() +
  geom_violin(aes(y = TSS.enrichment, 
                  x = project),
              fill = "grey80",
              color = "grey20") +
  scale_x_discrete(label = "TSS Enrichment",
                   name = element_blank()) +
  ylab(element_blank()) +
  theme_classic()

p4 <- dat_10x@meta.data |>
  mutate(project = "heater") |>
  ggplot() +
  geom_violin(aes(y = nucleosome_signal, 
                  x = project),
              fill = "grey80",
              color = "grey20") +
  scale_x_discrete(label = "Nucleosome signal",
                   name = element_blank()) +
  ylab(element_blank()) +
  theme_classic()

p5 <- dat_10x@meta.data |>
  mutate(project = "heater") |>
  ggplot() +
  geom_violin(aes(y = percent.mt/100, 
                  x = project),
              fill = "grey80",
              color = "grey20") +
  scale_x_discrete(label = "Percent mitochondrial reads",
                   name = element_blank()) +
  scale_y_continuous(name = element_blank(),
                     labels = scales::percent_format(accuracy = 1)) +
  theme_classic()

# Combine all plots together
cowplot::plot_grid(p2, p3, p4, p1, p5,
                   nrow = 2)

# Save plot
ggsave(here::here("output/figs/qc/violin_qc_metrics.pdf"),
       height = 15,
       width = 30,
       units = "cm")
ggsave(here::here("output/figs/qc/violin_qc_metrics.png"),
       height = 15,
       width = 30,
       units = "cm")
qc_cells.R
Code
# ------------------------------------------------------------------------------
# Plots for Quality Control metrics after cell calling
# May 25, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
require(tidyverse)

# Load libraries
library(tidyverse)
library(Seurat)
library(Signac)

# Load raw data
dat_raw <- readRDS(
  here::here("data/processed/seurat_object/00_dat_raw.rds")
)

# Load filtered cells - mutiplets removed
dat <- readRDS(
  here::here("data/processed/seurat_object/06_dat_qc.rds")
)

# Define count cutoffs
low_ATAC <- 800
low_RNA <- 200

# Cells called by Cell Ranger ARC & low count threshold
dat_raw@meta.data |>
  filter(nCount_RNA >= 1 &
           nCount_ATAC >= 1) |>
  rownames_to_column("cells") |>
  mutate(cells_10x = ifelse(cells %in% Cells(dat) &
                              nCount_RNA >= low_RNA &
                              nCount_ATAC >= low_ATAC, 
                            "Cells", 
                            "Non-cell barcodes")) |>
  mutate(cells_10x = factor(cells_10x,
                            levels = c("Non-cell barcodes",
                                       "Cells"))) |>
  ggplot(aes(x = nCount_RNA,
             y = nCount_ATAC)) +
  geom_point(aes(color = cells_10x),
             shape = 21) +
  geom_segment(aes(x = low_RNA, xend = low_RNA,
                   y = low_ATAC, yend = 150000),
               linetype = 2,
               color = "grey50",
               alpha = 0.8,
               linewidth = 1.1) +
  geom_segment(aes(y = low_ATAC, yend = low_ATAC,
                   x = low_RNA, xend = 50000),
               linetype = 2,
               color = "grey50",
               alpha = 0.8,
               linewidth = 1.1) +
  scale_x_continuous(trans = "log10", 
                     breaks = c(1, 10, 100, 1000, 10000, 100000),
                     labels = c("1", "10", "100", "1k", "10k", "100k"),
                     name = "RNA counts") +
  scale_y_continuous(trans = "log10", 
                     breaks = c(1, 10, 100, 1000, 10000, 100000),
                     labels = c("1", "10", "100", "1k", "10k", "100k"),
                     name = "ATAC counts") +
  scale_color_manual(values = c("grey90", "#91AB73"),
                     name = element_blank()) +
  cowplot::theme_minimal_grid() +
  theme(legend.position = "bottom")

# Save scatter plot
ggsave(here::here("output/figs/qc/scatter_rna_atac_final_filter.pdf"),
       height = 20,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/qc/scatter_rna_atac_final_filter.png"),
       height = 20,
       width = 20,
       units = "cm")

# Violin plot after filtering
p1 <- dat@meta.data |>
  mutate(project = "heater") |>
  ggplot() +
  geom_violin(aes(y = nCount_RNA, 
                  x = project),
              fill = "grey80",
              color = "grey20") +
  scale_x_discrete(label = "RNA Counts",
                   name = element_blank()) +
  scale_y_continuous(labels = scales::label_number(suffix = "k", scale = 1e-3),
                     name = element_blank()) +
  theme_classic()

p2 <- dat@meta.data |>
  mutate(project = "heater") |>
  ggplot() +
  geom_violin(aes(y = nCount_ATAC, 
                  x = project),
              fill = "grey80",
              color = "grey20") +
  scale_x_discrete(label = "ATAC Counts",
                   name = element_blank()) +
  scale_y_continuous(labels = scales::label_number(suffix = "k", scale = 1e-3),
                     name = element_blank()) +
  theme_classic()

p3 <- dat@meta.data |>
  mutate(project = "heater") |>
  ggplot() +
  geom_violin(aes(y = TSS.enrichment, 
                  x = project),
              fill = "grey80",
              color = "grey20") +
  scale_x_discrete(label = "TSS Enrichment",
                   name = element_blank()) +
  ylab(element_blank()) +
  theme_classic()

p4 <- dat@meta.data |>
  mutate(project = "heater") |>
  ggplot() +
  geom_violin(aes(y = nucleosome_signal, 
                  x = project),
              fill = "grey80",
              color = "grey20") +
  scale_x_discrete(label = "Nucleosome signal",
                   name = element_blank()) +
  ylab(element_blank()) +
  theme_classic()

p5 <- dat@meta.data |>
  mutate(project = "heater") |>
  ggplot() +
  geom_violin(aes(y = percent.mt/100, 
                  x = project),
              fill = "grey80",
              color = "grey20") +
  scale_x_discrete(label = "Percent mitochondrial reads",
                   name = element_blank()) +
  scale_y_continuous(name = element_blank(),
                     labels = scales::percent_format(accuracy = 1)) +
  theme_classic()

# Combine all plots together
cowplot::plot_grid(p2, p3, p4, p1, p5,
                   nrow = 2)

# Save plot
ggsave(here::here("output/figs/qc/violin_qc_metrics_post_filter.pdf"),
       height = 15,
       width = 30,
       units = "cm")
ggsave(here::here("output/figs/qc/violin_qc_metrics_post_filter.png"),
       height = 15,
       width = 30,
       units = "cm")

# TSS Enrichment plot for each sample ------------------------------------------
TSSPlot(dat, 
        group.by = "sample_name") +
  scale_color_manual(values = rep("grey50",4)) +
  cowplot::theme_cowplot() +
  theme(legend.position = "none")


# Save plot
ggsave(here::here("output/figs/qc/tss_enrichment_samples.pdf"),
       height = 15,
       width = 25,
       units = "cm")
ggsave(here::here("output/figs/qc/tss_enrichment_samples.png"),
       height = 15,
       width = 25,
       units = "cm")

# # Fragment histogram for each sample -------------------------------------------
# FragmentHistogram(dat, 
#                   group.by = "sample_name")
# 
# # Save plot
# ggsave(here::here("output/figs/qc/fragment_hist.pdf"),
#        height = 15,
#        width = 25,
#        units = "cm")
# ggsave(here::here("output/figs/qc/fragment_hist.png"),
#        height = 15,
#        width = 25,
#        units = "cm")

# FRiP
dat@meta.data |>
  group_by(sample_name) |>
  summarise(FRiP = median(FRiP))  |>
  mutate(sample_name = factor(sample_name, levels = c(c("25C_Rep2",
                                                        "25C_Rep1",
                                                        "18C_Rep2",
                                                        "18C_Rep1")))) |>
  ggplot(aes(x = FRiP,
             y = sample_name,
             label = round(FRiP, digits = 3))) +
  geom_col(color = "grey20",
           fill = "grey80",
           width = 0.8)+
  scale_x_continuous(expand = c(0, 0),
                     name = "Median per cell fraction of reads in peaks (FRiP)") +
  geom_label(nudge_x = -0.035) +
  scale_y_discrete(name = element_blank(),
                   labels = c("25°C Rep 2",
                              "25°C Rep 1",
                              "18°C Rep 2",
                              "18°C Rep 1")) +
  cowplot::theme_cowplot() 
 
# Save plot
ggsave(here::here("output/figs/qc/frip_samples.pdf"),
       height = 10,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/qc/frip_samples.png"),
       height = 10,
       width = 20,
       units = "cm")

# FRiT
dat@meta.data |>
  group_by(sample_name) |>
  summarise(FRiT = median(FRiT))  |>
  mutate(sample_name = factor(sample_name, levels = c(c("25C_Rep2",
                                                        "25C_Rep1",
                                                        "18C_Rep2",
                                                        "18C_Rep1")))) |>
  ggplot(aes(x = FRiT,
             y = sample_name,
             label = round(FRiT, digits = 3))) +
  geom_col(color = "grey20",
           fill = "grey80",
           width = 0.8)+
  scale_x_continuous(expand = c(0, 0),
                     name = "Median per cell fraction of reads in TSS (FRiT)") +
  geom_label(nudge_x = -0.05) +
  scale_y_discrete(name = element_blank(),
                   labels = c("25°C Rep 2",
                              "25°C Rep 1",
                              "18°C Rep 2",
                              "18°C Rep 1")) +
  cowplot::theme_cowplot() 

# Save plot
ggsave(here::here("output/figs/qc/frit_samples.pdf"),
       height = 10,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/qc/frit_samples.png"),
       height = 10,
       width = 20,
       units = "cm")
  
# Cells per sample
dat@meta.data |>
  group_by(sample_name) |>
  mutate(sample_name = factor(sample_name, levels = c(c("25C_Rep2",
                                                        "25C_Rep1",
                                                        "18C_Rep2",
                                                        "18C_Rep1")))) |>
  tally() |>
  ggplot(aes(x = n,
             y = sample_name,
             label = n)) +
  geom_col(color = "grey20",
           fill = "grey80",
           width = 0.8)+
  scale_x_continuous(expand = c(0, 0),
                     name = "Number of cells") +
  geom_label(nudge_x = -200) +
  scale_y_discrete(name = element_blank(),
                   labels = c("25°C Rep 2",
                              "25°C Rep 1",
                              "18°C Rep 2",
                              "18°C Rep 1")) +
  cowplot::theme_cowplot() 

# Save plot
ggsave(here::here("output/figs/qc/cells_sample.pdf"),
       height = 10,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/qc/cells_sample.png"),
       height = 10,
       width = 20,
       units = "cm")

# Median RNA and ATAC counts
dat@meta.data |>
  group_by(sample_name) |>
  summarise(nCount_RNA_median = median(nCount_RNA),
            nCount_ATAC_median = median(nCount_ATAC))  |>
  mutate(sample_name = factor(sample_name, levels = c(c("25C_Rep2",
                                                        "25C_Rep1",
                                                        "18C_Rep2",
                                                        "18C_Rep1")))) |>
  ggplot(aes(x = nCount_RNA_median,
             y = sample_name,
             label = nCount_RNA_median)) +
  geom_col(color = "grey20",
           fill = "grey80",
           width = 0.8)+
  scale_x_continuous(expand = c(0, 0),
                     name = "Median per cell RNA count") +
  geom_label(nudge_x = -75) +
  scale_y_discrete(name = element_blank(),
                   labels = c("25°C Rep 2",
                              "25°C Rep 1",
                               "18°C Rep 2",
                              "18°C Rep 1")) +
  cowplot::theme_cowplot() 

# Save plot
ggsave(here::here("output/figs/qc/median_rna_sample.pdf"),
       height = 10,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/qc/median_rna_sample.png"),
       height = 10,
       width = 20,
       units = "cm")

# Median RNA and ATAC counts
dat@meta.data |>
  group_by(sample_name) |>
  summarise(nCount_RNA_median = median(nCount_RNA),
            nCount_ATAC_median = median(nCount_ATAC))  |>
  mutate(sample_name = factor(sample_name, levels = c(c("25C_Rep2",
                                                        "25C_Rep1",
                                                        "18C_Rep2",
                                                        "18C_Rep1")))) |>
  ggplot(aes(x = nCount_ATAC_median,
             y = sample_name,
             label = nCount_ATAC_median)) +
  geom_col(color = "grey20",
           fill = "grey80",
           width = 0.8)+
  scale_x_continuous(expand = c(0, 0),
                     name = "Median per cell ATAC count") +
  geom_label(nudge_x = -300) +
  scale_y_discrete(name = element_blank(),
                   labels = c("25°C Rep 2",
                              "25°C Rep 1",
                              "18°C Rep 2",
                              "18°C Rep 1")) +
  cowplot::theme_cowplot() 

# Save plot
ggsave(here::here("output/figs/qc/median_atac_sample.pdf"),
       height = 10,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/qc/median_atac_sample.png"),
       height = 10,
       width = 20,
       units = "cm")
multiplets.R
Code
# ------------------------------------------------------------------------------
# Plot marked multiplets onto the data
# May 23, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
require(tidyverse)

# Load data
dat <- readRDS(
  here::here("data/processed/seurat_object/05_dat_multiplets.rds")
  ) 

dat@meta.data <- dat@meta.data |>
  mutate(doublet = ifelse(doublet_amulet == "Multiplet" | 
                            doublet_finder == "Multiplet",
                          "Multiplet",
                          "Singlet"))

# Visualize multiplets on the UMAP projection ----------------------------------
DimPlot(dat, 
        group.by = "doublet") +
  labs(title = element_blank()) +
  scale_color_manual(name = element_blank(),
                     values = c("firebrick", "grey80")) +
  theme_void() +
  theme(legend.position = "bottom")

# Save the plot
ggsave(here::here("output/figs/qc/multiplet_umap.pdf"),
       width = 15,
       height = 15,
       units = "cm")
ggsave(here::here("output/figs/qc/multiplet_umap.png"),
       width = 15,
       height = 15,
       units = "cm")

# Counts mapped on to UMAP projection

# ATAC counts
p1 <- FeaturePlot(dat, 
        feature = "nCount_ATAC") +
  labs(title = element_blank()) +
  scale_color_gradient(name = "ATAC Counts",
                       low = "grey90", 
                       high = "darkgreen") +
  theme_void()

# RNA counts
p2 <- FeaturePlot(dat, 
            feature = "nCount_RNA") +
  scale_color_gradient(name = "RNA Counts",
                       low = "grey90", 
                       high = "darkgreen") +
  labs(title = element_blank()) +
  theme_void()

# Combine plots
cowplot::plot_grid(p1, p2, 
                   nrow = 1)

# Save plot
ggsave(here::here("output/figs/qc/counts_umap.pdf"),
       width = 25,
       height = 15,
       units = "cm")
ggsave(here::here("output/figs/qc/counts_umap.png"),
       width = 20,
       height = 15,
       units = "cm")

# Histogram out counts for RNA and ATAC libraries with multiplets --------------

# RNA count multiplet histogram
p1 <- dat@meta.data |>
  mutate(doublet = factor(doublet,
                          levels = c("Singlet", "Multiplet"))) |>
  ggplot() +
  geom_histogram(aes(x = nCount_RNA,
                     fill = doublet),
                 color = "grey30",
                 position = "dodge",
                 bins = 40) +
  scale_fill_manual(values = c("grey50", "firebrick"),
                    name = element_blank()) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
  labs(y = "Count", x = "RNA counts per barcode") +
  cowplot::theme_cowplot()

# ATAC count multiplet histogram
p2 <- dat@meta.data |>
  mutate(doublet = factor(doublet,
                          levels = c("Singlet", "Multiplet"))) |>
  ggplot() +
  geom_histogram(aes(x = nCount_ATAC,
                     fill = doublet),
                 color = "grey30",
                 position = "dodge",
                 bins = 40) +
  scale_fill_manual(values = c("grey50", "firebrick"),
                    name = element_blank()) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
  labs(y = "Count", x = "ATAC counts per barcode") +
  cowplot::theme_cowplot()


# Extract the legend from one of the plots
legend <- cowplot::get_legend(
  # Create some space to the left of the legend
  p1 + theme(legend.box.margin = margin(0, 0, 0, 12))
)

# Combine two plots without their legends
plots <- cowplot::plot_grid(p1 + theme(legend.position = "none"), 
                            p2 + theme(legend.position = "none"),
                            nrow = 2,
                            labels = c("A", "B"))

# Add the legend to the plots we made earlier
cowplot::plot_grid(plots, legend, rel_widths = c(2, .5))

# Save the plot
ggsave(here::here("output/figs/qc/multiplets_hist.pdf"),
       width = 20,
       height = 15,
       units = "cm")
ggsave(here::here("output/figs/qc/multiplets_hist.png"),
       width = 20,
       height = 15,
       units = "cm")

# Histogram out counts for RNA and ATAC libraries without multiplets -----------

# RNA count multiplet histogram
p1 <- dat@meta.data |>
  mutate(doublet = factor(doublet,
                          levels = c("Singlet", "Multiplet"))) |>
  filter(doublet == "Singlet" & nCount_RNA < 10000) |>
  ggplot() +
  geom_histogram(aes(x = nCount_RNA),
                 fill = "grey50",
                 color = "grey30",
                 position = "dodge",
                 bins = 30) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
  labs(y = "Count", x = "RNA counts per barcode") +
  cowplot::theme_cowplot()

# ATAC count multiplet histogram
p2 <- dat@meta.data |>
  mutate(doublet = factor(doublet,
                          levels = c("Singlet", "Multiplet"))) |>
  filter(doublet == "Singlet" & nCount_RNA < 10000) |>
  ggplot() +
  geom_histogram(aes(x = nCount_ATAC),
                 fill = "grey50",
                 color = "grey30",
                 position = "dodge",
                 bins = 30) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
  labs(y = "Count", x = "ATAC counts per barcode") +
  cowplot::theme_cowplot() 

# Combine two plots without their legends
cowplot::plot_grid(p1, 
                   p2,
                   nrow = 2,
                   labels = c("A", "B"))

# Save the plot
ggsave(here::here("output/figs/qc/singlets_only_hist.pdf"),
       width = 20,
       height = 15,
       units = "cm")
ggsave(here::here("output/figs/qc/singlets_only_hist.png"),
       width = 20,
       height = 15,
       units = "cm")
cluster.R
Code
# ------------------------------------------------------------------------------
# Cluster and expression plots
# May 16, 2023
# TS O'Leary
# ------------------------------------------------------------------------------

# Load libraries
library(tidyverse)
library(Seurat)
library(Signac)

# Load data
dat <- readRDS(here::here("data/processed/seurat_object/07_dat_cluster.rds"))
dat <- readRDS(here::here("data/processed/seurat_object/08_dat_linked.rds"))

# Quick bar plot counting the number of cells for each cluster -----------------
dat@meta.data |>
  ggplot(aes(x = seurat_clusters,
             fill = acc_temp)) +
  geom_bar(position = "dodge",
           color = "grey50") +
  labs(y = "Number of cells",
       x = "Cluster") +
  scale_fill_manual(name = element_blank(),
                    values = c("#43aa8b", "#f3722c")) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
  cowplot::theme_minimal_hgrid()

# Save plot
ggsave(here::here("output/figs/cluster/cells_per_cluster.pdf"),
       height = 15,
       width = 30,
       units = "cm")
ggsave(here::here("output/figs/cluster/cells_per_cluster.png"),
       height = 15,
       width = 30,
       units = "cm")

# # Joint UMAP projection ------------------------------------------------------
# Acclimation temperature conditions plotted on top of each other 
DimPlot(dat, 
        group.by = "acc_temp") +
  scale_color_manual(name = element_blank(),
                    values = c("#43aa8b", "#f3722c")) +
  labs(title = element_blank()) +
  theme_void() +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(byrow = TRUE,
                              nrow = 1,
                              override.aes = list(size = 2)))

# Save plot
ggsave(here::here("output/figs/cluster/umap_18_25_overlay.pdf"),
       height = 20,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/cluster/umap_18_25_overlay.png"),
       height = 20,
       width = 20,
       units = "cm")

# Acclimation temperature split apart 
DimPlot(dat, 
        split.by = "acc_temp",
        label = TRUE,
        repel = TRUE,
        label.color = "grey20") +
  theme_void() +
  theme(legend.position = "bottom",
        strip.text = element_text(size = 16, face = "bold"),
        strip.background = element_rect(fill = "grey95", color = "grey95")
        ) +
  guides(color = guide_legend(byrow = TRUE,
                              nrow = 1,
                              override.aes = list(size = 2)))

# Save plot
ggsave(here::here("output/figs/cluster/umap_18_25_split.pdf"),
       height = 15,
       width = 30,
       units = "cm")
ggsave(here::here("output/figs/cluster/umap_18_25_split.png"),
       height = 15,
       width = 30,
       units = "cm")


# RNA only UMAP projection -----------------------------------------------------
# Load RNA UMAP
dat <- readRDS(
  here::here("data/processed/seurat_object/07_dat_rna_umap.rds")
)

# Acclimation temperature conditions plotted on top of each other 
DimPlot(dat, 
        group.by = "acc_temp") +
  scale_color_manual(name = element_blank(),
                     values = c("#43aa8b", "#f3722c")) +
  labs(title = element_blank()) +
  theme_void() +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(byrow = TRUE,
                              nrow = 1,
                              override.aes = list(size = 2)))

# Save plot
ggsave(here::here("output/figs/cluster/umap_18_25_overlay_rna.pdf"),
       height = 20,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/cluster/umap_18_25_overlay_rna.png"),
       height = 20,
       width = 20,
       units = "cm")

# Acclimation temperature split apart 
DimPlot(dat, 
        split.by = "acc_temp",
        label = TRUE,
        repel = TRUE,
        label.color = "grey20") +
  theme_void() +
  theme(legend.position = "bottom",
        strip.text = element_text(size = 16, face = "bold"),
        strip.background = element_rect(fill = "grey95", color = "grey95")
  ) +
  guides(color = guide_legend(byrow = TRUE,
                              nrow = 1,
                              override.aes = list(size = 2)))

# Save plot
ggsave(here::here("output/figs/cluster/umap_18_25_split_rna.pdf"),
       height = 15,
       width = 30,
       units = "cm")
ggsave(here::here("output/figs/cluster/umap_18_25_split_rna.png"),
       height = 15,
       width = 30,
       units = "cm")


# ATAC only UMAP projection ----------------------------------------------------
# Load ATAC UMAP
dat <- readRDS(
  here::here("data/processed/seurat_object/07_dat_atac_umap.rds")
)

# Acclimation temperature conditions plotted on top of each other 
DimPlot(dat, 
        group.by = "acc_temp") +
  scale_color_manual(name = element_blank(),
                     values = c("#43aa8b", "#f3722c")) +
  labs(title = element_blank()) +
  theme_void() +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(byrow = TRUE,
                              nrow = 1,
                              override.aes = list(size = 2)))

# Save plot
ggsave(here::here("output/figs/cluster/umap_18_25_overlay_atac.pdf"),
       height = 20,
       width = 20,
       units = "cm")
ggsave(here::here("output/figs/cluster/umap_18_25_overlay_atac.png"),
       height = 20,
       width = 20,
       units = "cm")

# Acclimation temperature split apart 
DimPlot(dat, 
        split.by = "acc_temp",
        label = TRUE,
        repel = TRUE,
        label.color = "grey20") +
  theme_void() +
  theme(legend.position = "bottom",
        strip.text = element_text(size = 16, face = "bold"),
        strip.background = element_rect(fill = "grey95", color = "grey95")
  ) +
  guides(color = guide_legend(byrow = TRUE,
                              nrow = 1,
                              override.aes = list(size = 2)))

# Save plot
ggsave(here::here("output/figs/cluster/umap_18_25_split_atac.pdf"),
       height = 15,
       width = 30,
       units = "cm")
ggsave(here::here("output/figs/cluster/umap_18_25_split_atac.png"),
       height = 15,
       width = 30,
       units = "cm")
Note

The code used for analysis is located on GitHub

1 Quality control

Criteria for filtering all barcodes to high-quality nuclei
  1. We used Cell Ranger ARC called cell barcodes – algorithm described here. 14,301 barcodes out of 2,215,183.

  2. Because the Cell Ranger ARC cell calling algorithm is very permissive to barcodes with very low counts (i.e., a minimum of a single count in each library), barcodes were additionally filtered to a low count threshold in both the ATAC and RNA libraries based on the clearly defined population of cells in the RNA and ATAC count scatterplot. Additionally, barcodes with more than 5% of RNA reads mapping to genes on the mitochondrial genome were excluded. A total of 886 barcodes were filtered out for 13,145 left.

  3. Multiplets were filtered out using two independent methods, relying on either the ATAC or RNA libraries to call multiplets. AMULET relies on the assumption that in snATAC-seq of diploids there should be at most two overlapping fragments with the same cell barcode. The presence of more than two overlapping fragments is a potential indication of a multiplet. Doublets were also identified using the RNA-seq libraries with DoubletFinder. After removing the 1,103 multiplet barcodes, there were still two barcodes that had substantially higher number of RNA reads after all filtering (i.e., n_Count_RNA around 12-13k when the next highest are around 6-7k). So I removed those two for a final number of 12,040 high quality nuceli barcodes.

1.1 Knee plot

Figure 1: Knee plot of the number of ATAC reads and RNA reads in the raw data.

1.2 Scatter plot of counts

1.3 Violin QC plots

1.4 Multiplets

Count histograms

1.5 QC metrics per sample

1.6 QC metrics on UMAP

2 Seurat

2.1 Clusters

Cells per cluster

2.1.1 Acclimation clusters

2.2 Annotation

2.2.1 Annotation results

cluster annot prop_overlap pval padj
0 ventral epidermis primordium 0.0532787 0.0000000 0.0000000
0 dorsal epidermis primordium 0.0532787 0.0000000 0.0000000
0 dorsal ectoderm primordium 0.0368852 0.0000002 0.0000159
0 embryonic dorsal epidermis 0.0491803 0.0000007 0.0000484
0 tracheal primordium 0.0245902 0.0000098 0.0004196
0 embryonic ventral epidermis 0.0409836 0.0000121 0.0005023
0 dorsal epidermis anlage 0.0122951 0.0000838 0.0026510
0 ventral ectoderm anlage in statu nascendi 0.0204918 0.0003854 0.0098415
0 subset 0.0245902 0.0005109 0.0121885
0 foregut primordium 0.0245902 0.0005463 0.0126547
0 clypeo-labral primordium 0.0163934 0.0006188 0.0141685
0 hindgut proper primordium 0.0286885 0.0008120 0.0179733
0 ventral ectoderm primordium 0.0204918 0.0010421 0.0218520
0 embryonic/larval tracheal system 0.0204918 0.0013134 0.0268347
0 embryonic epipharynx 0.0204918 0.0018848 0.0362357
0 ventral epidermis primordium P2 0.0081967 0.0020118 0.0379853
0 atrium 0.0163934 0.0026519 0.0465421
1 trunk mesoderm primordium 0.0585106 0.0000053 0.0002496
1 muscle system primordium 0.0265957 0.0001090 0.0033143
1 somatic muscle primordium 0.0265957 0.0004709 0.0115811
1 longitudinal visceral mesoderm primordium 0.0159574 0.0013004 0.0267054
2 ventral nerve cord primordium 0.0388350 0.0000317 0.0011599
2 brain primordium 0.0388350 0.0000503 0.0016699
2 ventral nerve cord primordium P3 0.0258900 0.0002285 0.0063667
2 ventral ectoderm primordium P2 0.0226537 0.0002856 0.0076374
2 no staining 0.0194175 0.0014403 0.0289806
2 embryonic brain 0.0388350 0.0020681 0.0388641
2 central brain primordium P3 0.0129450 0.0022350 0.0412232
3 posterior midgut primordium 0.0996785 0.0000000 0.0000000
3 anterior midgut primordium 0.0932476 0.0000000 0.0000000
3 posterior endoderm primordium 0.0707395 0.0000000 0.0000000
3 anterior endoderm primordium 0.0675241 0.0000000 0.0000000
3 embryonic midgut 0.0868167 0.0000000 0.0000000
3 embryonic/larval muscle system 0.0546624 0.0000000 0.0000000
3 trunk mesoderm primordium 0.0610932 0.0000000 0.0000002
3 posterior endoderm primordium P2 0.0450161 0.0000000 0.0000005
3 somatic muscle primordium 0.0353698 0.0000000 0.0000013
3 embryonic/larval visceral muscle 0.0385852 0.0000000 0.0000033
3 dorsal prothoracic pharyngeal muscle 0.0385852 0.0000002 0.0000130
3 dorsal pharyngeal muscle primordium 0.0257235 0.0000004 0.0000302
3 visceral muscle primordium 0.0289389 0.0000010 0.0000640
3 anterior endoderm anlage 0.0353698 0.0000011 0.0000647
3 embryonic/larval somatic muscle 0.0321543 0.0000011 0.0000650
3 muscle system primordium 0.0257235 0.0000012 0.0000718
3 embryonic anal pad 0.0289389 0.0000033 0.0001646
3 salivary gland primordium 0.0192926 0.0000039 0.0001888
3 head mesoderm primordium P2 0.0353698 0.0000113 0.0004780
3 head mesoderm primordium 0.0257235 0.0000225 0.0008548
3 cellular blastoderm 0.0385852 0.0000415 0.0014407
3 embryonic hindgut 0.0385852 0.0000448 0.0015240
3 maternal 0.1028939 0.0000890 0.0027922
3 salivary gland body primordium 0.0192926 0.0002808 0.0076106
3 longitudinal visceral mesoderm primordium 0.0128617 0.0004456 0.0110955
3 embryonic/larval fat body 0.0192926 0.0005423 0.0126349
3 foregut primordium 0.0192926 0.0018918 0.0362357
3 trunk mesoderm anlage 0.0064309 0.0019156 0.0365160
3 head mesoderm primordium P4 0.0225080 0.0021003 0.0392842
3 ubiquitous 0.0643087 0.0021180 0.0394311
4 ventral epidermis primordium 0.0437158 0.0000002 0.0000168
4 dorsal ectoderm primordium 0.0382514 0.0000036 0.0001738
4 dorsal epidermis primordium 0.0382514 0.0000095 0.0004122
4 tracheal primordium 0.0273224 0.0000335 0.0012120
4 embryonic ventral epidermis 0.0437158 0.0000569 0.0018588
4 embryonic dorsal epidermis 0.0437158 0.0001155 0.0034612
4 embryonic/larval tracheal system 0.0273224 0.0003611 0.0093414
4 subset 0.0273224 0.0009376 0.0204129
4 hindgut proper primordium 0.0327869 0.0009616 0.0205963
4 ventral epidermis primordium P2 0.0109290 0.0011416 0.0236881
4 embryonic foregut 0.0273224 0.0016159 0.0318710
5 muscle system primordium 0.0307692 0.0003132 0.0082638
5 visceral muscle primordium 0.0307692 0.0009039 0.0197870
5 longitudinal visceral muscle fibers 0.0153846 0.0015871 0.0314581
6 tracheal primordium 0.0508475 0.0000000 0.0000000
6 embryonic/larval tracheal system 0.0451977 0.0000001 0.0000129
6 dorsal epidermis primordium 0.0451977 0.0000006 0.0000439
6 dorsal ectoderm primordium 0.0395480 0.0000029 0.0001485
6 embryonic ventral epidermis 0.0508475 0.0000059 0.0002705
6 embryonic dorsal epidermis 0.0508475 0.0000133 0.0005317
6 ventral epidermis primordium 0.0338983 0.0000303 0.0011268
6 primary segmental branch specific anlage 0.0112994 0.0001209 0.0035668
6 embryonic salivary gland duct 0.0169492 0.0001680 0.0048494
6 visceral branch specific anlage 0.0112994 0.0001842 0.0052796
6 dorsal trunk specific anlage 0.0112994 0.0005672 0.0130612
6 dorsal epidermis anlage 0.0112994 0.0017218 0.0334610
6 embryonic salivary gland common duct 0.0112994 0.0019356 0.0367202
6 embryonic head epidermis 0.0282486 0.0023524 0.0425996
7 ventral nerve cord primordium P3 0.0448980 0.0000001 0.0000074
7 brain primordium 0.0571429 0.0000001 0.0000117
7 ventral nerve cord primordium 0.0530612 0.0000005 0.0000369
7 embryonic brain 0.0653061 0.0000008 0.0000506
7 procephalic ectoderm primordium 0.0367347 0.0000020 0.0001084
7 procephalic ectoderm anlage 0.0326531 0.0000030 0.0001555
7 dorsal ectoderm anlage in statu nascendi 0.0285714 0.0000067 0.0003047
7 ventral nerve cord anlage 0.0244898 0.0000131 0.0005264
7 ventral ectoderm primordium 0.0285714 0.0000131 0.0005264
7 procephalic ectoderm anlage in statu nascendi 0.0244898 0.0000395 0.0013943
7 ventral nerve cord 0.0489796 0.0002790 0.0076106
7 ventral ectoderm anlage in statu nascendi 0.0204082 0.0003926 0.0099626
7 head mesoderm anlage in statu nascendi 0.0122449 0.0004129 0.0104104
7 amnioserosa anlage in statu nascendi 0.0122449 0.0004578 0.0113295
7 ectoderm anlage in statu nascendi 0.0122449 0.0004815 0.0117678
7 ubiquitous 0.0734694 0.0004957 0.0119689
7 subset 0.0244898 0.0005220 0.0123785
7 cellular blastoderm 0.0367347 0.0005275 0.0123939
7 central brain primordium P3 0.0163265 0.0009581 0.0205963
7 head epidermis dorsal anlage in statu nascendi 0.0081633 0.0011976 0.0247220
7 anlage in statu nascendi 0.0163265 0.0016566 0.0325115
7 anterior endoderm anlage in statu nascendi 0.0122449 0.0024786 0.0441834
7 yolk nuclei 0.0204082 0.0024842 0.0441834
7 maternal 0.0938776 0.0025898 0.0458062
8 plasmatocytes anlage 0.0365854 0.0000000 0.0000000
8 head mesoderm primordium P2 0.0560976 0.0000000 0.0000000
8 yolk nuclei 0.0439024 0.0000000 0.0000000
8 amnioserosa 0.0414634 0.0000000 0.0000000
8 plasmatocytes 0.0219512 0.0000000 0.0000001
8 garland cell primordium 0.0219512 0.0000000 0.0000001
8 head mesoderm primordium P4 0.0365854 0.0000000 0.0000019
8 embryonic salivary gland 0.0243902 0.0000004 0.0000286
8 posterior endoderm primordium P2 0.0317073 0.0000009 0.0000579
8 embryonic/larval garland cell 0.0195122 0.0000012 0.0000720
8 maternal 0.1073171 0.0000013 0.0000752
8 salivary gland body primordium 0.0219512 0.0000031 0.0001562
8 ubiquitous 0.0756098 0.0000034 0.0001658
8 crystal cell 0.0146341 0.0000082 0.0003637
8 embryonic/larval fat body 0.0219512 0.0000085 0.0003702
8 anterior endoderm anlage 0.0268293 0.0000145 0.0005656
8 posterior endoderm primordium 0.0317073 0.0000236 0.0008887
8 procrystal cell 0.0121951 0.0000430 0.0014782
8 anterior endoderm primordium 0.0292683 0.0000697 0.0022403
8 embryonic midgut 0.0487805 0.0000829 0.0026429
8 head mesoderm anlage in statu nascendi 0.0097561 0.0001014 0.0031303
8 cellular blastoderm 0.0317073 0.0001462 0.0042511
8 posterior midgut primordium 0.0390244 0.0002203 0.0062258
8 salivary gland primordium 0.0121951 0.0002226 0.0062441
8 trunk mesoderm primordium 0.0317073 0.0004389 0.0109985
8 embryonic dorsal epidermis 0.0268293 0.0004901 0.0119068
8 apically cleared 0.0097561 0.0008015 0.0178385
8 mesoderm anlage in statu nascendi 0.0097561 0.0010420 0.0218520
8 anterior endoderm anlage in statu nascendi 0.0097561 0.0010801 0.0225286
8 dorsal epidermis primordium 0.0170732 0.0013450 0.0272322
8 anterior midgut primordium 0.0341463 0.0013466 0.0272322
8 trunk mesoderm primordium P2 0.0219512 0.0015033 0.0300668
8 posterior endoderm anlage in statu nascendi 0.0097561 0.0017201 0.0334610
8 embryonic/larval tracheal system 0.0146341 0.0024406 0.0439969
8 ventral epidermis primordium 0.0146341 0.0025984 0.0458062
9 amnioserosa 0.0416667 0.0000000 0.0000000
9 yolk nuclei 0.0321970 0.0000000 0.0000000
9 embryonic salivary gland 0.0284091 0.0000000 0.0000000
9 embryonic dorsal epidermis 0.0416667 0.0000000 0.0000001
9 embryonic ventral epidermis 0.0378788 0.0000000 0.0000003
9 salivary gland body primordium 0.0246212 0.0000000 0.0000006
9 maternal 0.1098485 0.0000000 0.0000012
9 dorsal ectoderm primordium 0.0227273 0.0000004 0.0000275
9 embryonic epipharynx 0.0227273 0.0000005 0.0000398
9 embryonic midgut 0.0530303 0.0000007 0.0000484
9 embryonic hypopharynx 0.0227273 0.0000009 0.0000597
9 embryonic proventriculus 0.0227273 0.0000013 0.0000746
9 embryonic/larval tracheal system 0.0208333 0.0000017 0.0000917
9 dorsal epidermis primordium 0.0227273 0.0000017 0.0000917
9 subset 0.0227273 0.0000021 0.0001106
9 head mesoderm primordium P2 0.0284091 0.0000045 0.0002118
9 posterior midgut primordium 0.0416667 0.0000059 0.0002705
9 salivary gland primordium 0.0132576 0.0000071 0.0003163
9 ventral epidermis primordium 0.0189394 0.0000128 0.0005244
9 posterior endoderm primordium P2 0.0246212 0.0000136 0.0005350
9 dorsal ectoderm anlage in statu nascendi 0.0170455 0.0000212 0.0008104
9 garland cell primordium 0.0113636 0.0000305 0.0011268
9 anterior midgut primordium 0.0378788 0.0000353 0.0012685
9 ventral ectoderm primordium 0.0170455 0.0000470 0.0015869
9 embryonic hindgut 0.0303030 0.0000495 0.0016569
9 anterior endoderm primordium 0.0265152 0.0000520 0.0017106
9 apoptotic amnioserosa 0.0075758 0.0000578 0.0018711
9 ubiquitous 0.0625000 0.0000939 0.0029234
9 embryonic proventriculus outer layer 0.0075758 0.0001035 0.0031718
9 embryonic head epidermis 0.0208333 0.0001128 0.0034036
9 hindgut proper primordium 0.0227273 0.0001184 0.0035205
9 anterior endoderm anlage 0.0208333 0.0001389 0.0040680
9 embryonic foregut 0.0189394 0.0001873 0.0053299
9 atrium 0.0132576 0.0002623 0.0072568
9 head mesoderm anlage in statu nascendi 0.0075758 0.0002658 0.0073031
9 posterior endoderm primordium 0.0246212 0.0002910 0.0077299
9 ventral ectoderm primordium P2 0.0170455 0.0003336 0.0086855
9 foregut primordium 0.0170455 0.0003729 0.0095849
9 embryonic/larval garland cell 0.0113636 0.0005089 0.0121885
9 trunk mesoderm anlage in statu nascendi 0.0075758 0.0005289 0.0123939
9 procephalic ectoderm anlage 0.0151515 0.0006406 0.0145830
9 tracheal primordium 0.0113636 0.0006581 0.0148964
9 procephalon primordium 0.0056818 0.0007305 0.0164421
9 embryonic salivary gland body 0.0094697 0.0007527 0.0168459
9 head mesoderm primordium P4 0.0189394 0.0009603 0.0205963
9 plasmatocytes anlage 0.0094697 0.0009879 0.0210473
9 no staining 0.0946970 0.0010414 0.0218520
9 trunk mesoderm primordium 0.0265152 0.0015094 0.0300668
9 gnathal primordium 0.0056818 0.0017378 0.0336094
9 ventral ectoderm anlage in statu nascendi 0.0113636 0.0021914 0.0406077
9 clypeolabrum primordium P2 0.0056818 0.0022465 0.0412449
9 trunk mesoderm primordium P2 0.0189394 0.0024784 0.0441834
10 maternal 0.2123711 0.0000000 0.0000000
10 ubiquitous 0.1567010 0.0000000 0.0000000
10 trunk mesoderm primordium 0.0597938 0.0000000 0.0000000
10 embryonic midgut 0.0762887 0.0000000 0.0000000
10 faint ubiquitous 0.0701031 0.0000000 0.0000000
10 embryonic/larval muscle system 0.0474227 0.0000000 0.0000000
10 anterior midgut primordium 0.0577320 0.0000000 0.0000000
10 posterior midgut primordium 0.0577320 0.0000000 0.0000001
10 cellular blastoderm 0.0474227 0.0000000 0.0000001
10 head mesoderm primordium 0.0309278 0.0000000 0.0000001
10 trunk mesoderm primordium P2 0.0371134 0.0000000 0.0000005
10 anterior endoderm primordium 0.0391753 0.0000000 0.0000008
10 posterior endoderm primordium 0.0371134 0.0000001 0.0000067
10 anterior endoderm anlage 0.0309278 0.0000001 0.0000067
10 dorsal prothoracic pharyngeal muscle 0.0309278 0.0000001 0.0000074
10 somatic muscle primordium 0.0247423 0.0000001 0.0000117
10 posterior endoderm primordium P2 0.0309278 0.0000002 0.0000148
10 embryonic/larval visceral muscle 0.0268041 0.0000006 0.0000409
10 head mesoderm primordium P4 0.0288660 0.0000009 0.0000597
10 embryonic/larval somatic muscle 0.0247423 0.0000014 0.0000811
10 head mesoderm primordium P2 0.0309278 0.0000016 0.0000885
10 salivary gland body primordium 0.0185567 0.0000119 0.0004975
10 strong ubiquitous 0.0164948 0.0000209 0.0008080
10 embryonic brain 0.0412371 0.0000369 0.0013135
10 fat body specific anlage 0.0082474 0.0000416 0.0014407
10 brain primordium 0.0288660 0.0002837 0.0076357
10 ventral nerve cord 0.0371134 0.0003220 0.0084401
10 hindgut proper primordium 0.0206186 0.0009010 0.0197870
10 longitudinal visceral mesoderm primordium 0.0082474 0.0022860 0.0417766
10 embryonic hindgut 0.0247423 0.0023353 0.0424833

2.2.2 Manual annotations

cluster annot_manual top_3
0 ectoderm primordium ventral epidermis primordium; dorsal epidermis primordium; dorsal ectoderm primordium
1 mesoderm primordium trunk mesoderm primordium; muscle system primordium; somatic muscle primordium
2 ventral nerve cord primordium ventral nerve cord primordium; brain primordium; ventral nerve cord primordium P3
3 endoderm primordium posterior midgut primordium; anterior midgut primordium; posterior endoderm primordium
4 ectoderm primordium ventral epidermis primordium; dorsal ectoderm primordium; dorsal epidermis primordium
5 muscle system primordium muscle system primordium; visceral muscle primordium; longitudinal visceral muscle fibers
6 tracheal primordium tracheal primordium; embryonic/larval tracheal system; dorsal epidermis primordium
7 ventral nerve cord primordium ventral nerve cord primordium P3; brain primordium; ventral nerve cord primordium
8 plasmatocytes anlage plasmatocytes anlage; head mesoderm primordium P2; yolk nuclei
9 amnioserosa amnioserosa; yolk nuclei; embryonic salivary gland
10 unknown maternal; ubiquitous; trunk mesoderm primordium
Warning

Cluster 11 is likely also unknown or ubiquitous, as it does not have an annotation that passes FDR.

3 Differential expression

3.1 Cluster

# A tibble: 11 × 3
   cluster annot_manual                  top_3                                  
     <dbl> <chr>                         <chr>                                  
 1       0 ectoderm primordium           ventral epidermis primordium; dorsal e…
 2       1 mesoderm primordium           trunk mesoderm primordium; muscle syst…
 3       2 ventral nerve cord primordium ventral nerve cord primordium; brain p…
 4       3 endoderm primordium           posterior midgut primordium; anterior …
 5       4 ectoderm primordium           ventral epidermis primordium; dorsal e…
 6       5 muscle system primordium      muscle system primordium; visceral mus…
 7       6 tracheal primordium           tracheal primordium; embryonic/larval …
 8       7 ventral nerve cord primordium ventral nerve cord primordium P3; brai…
 9       8 plasmatocytes anlage          plasmatocytes anlage; head mesoderm pr…
10       9 amnioserosa                   amnioserosa; yolk nuclei; embryonic sa…
11      10 unknown                       maternal; ubiquitous; trunk mesoderm p…

cluster gene p_val avg_log2FC pct.1 pct.2 p_val_adj
0 CG13427 0.0e+00 0.5600492 0.946 0.845 0.0000000
0 Cys 0.0e+00 0.4679929 0.904 0.793 0.0000000
0 CG10035 0.0e+00 0.6809323 0.716 0.536 0.0000000
0 Cirl 0.0e+00 -0.4516883 0.855 0.884 0.0000000
0 Brd 0.0e+00 -0.4915044 0.715 0.781 0.0000000
0 tsr 0.0e+00 0.4843932 0.535 0.368 0.0000000
0 GILT1 0.0e+00 0.9994714 0.169 0.066 0.0000000
0 Idgf6 0.0e+00 0.5737086 0.315 0.174 0.0000000
0 Bacc 0.0e+00 -0.2517485 0.957 0.969 0.0000000
0 Hr4 0.0e+00 0.9147276 0.138 0.052 0.0000000
0 mt:ND6 0.0e+00 -0.5953809 0.377 0.476 0.0000000
0 RpS5b 0.0e+00 0.4047089 0.552 0.406 0.0000000
0 pdm2 0.0e+00 0.5765832 0.167 0.074 0.0000000
0 cathD 0.0e+00 0.3857197 0.513 0.372 0.0000000
0 BobA 0.0e+00 -0.4079905 0.822 0.852 0.0000000
0 Obp99a 0.0e+00 -0.4510674 0.714 0.769 0.0000000
0 RpLP2 0.0e+00 0.2618524 0.872 0.795 0.0000000
0 klar 0.0e+00 0.2880296 0.734 0.616 0.0000000
0 CG2493 0.0e+00 0.3794513 0.267 0.158 0.0000000
0 CG4440 0.0e+00 -0.2892379 0.930 0.957 0.0000000
0 Ssdp 0.0e+00 0.4044493 0.339 0.223 0.0000000
0 sm 0.0e+00 0.3460863 0.581 0.454 0.0000000
0 CG1307 0.0e+00 0.4431711 0.172 0.090 0.0000000
0 chn 0.0e+00 0.3183634 0.679 0.566 0.0000000
0 CG5059 0.0e+00 0.3714354 0.195 0.109 0.0000000
0 CG6398 0.0e+00 0.3446696 0.519 0.389 0.0000000
0 nub 0.0e+00 0.4746086 0.227 0.137 0.0000000
0 Catsup 0.0e+00 0.3656290 0.203 0.115 0.0000000
0 RpL23A 0.0e+00 0.2526979 0.792 0.699 0.0000000
0 Spn 0.0e+00 0.4403441 0.264 0.167 0.0000000
0 sesB 0.0e+00 0.3242305 0.458 0.337 0.0000000
0 awd 0.0e+00 0.2966855 0.391 0.275 0.0000001
0 bou 0.0e+00 0.3201423 0.483 0.369 0.0000002
0 CG10077 0.0e+00 0.3177381 0.480 0.362 0.0000002
0 CG11127 0.0e+00 0.3595725 0.142 0.075 0.0000004
0 lilli 0.0e+00 0.3123143 0.428 0.317 0.0000011
0 Idgf2 0.0e+00 0.3508375 0.134 0.070 0.0000012
0 pre-lola-G 0.0e+00 0.3201367 0.507 0.394 0.0000015
0 CG4408 0.0e+00 0.3078729 0.113 0.055 0.0000027
0 InR 0.0e+00 0.2551987 0.515 0.398 0.0000029
0 nrm 0.0e+00 -0.6638009 0.418 0.486 0.0000080
0 CG10470 0.0e+00 0.3311102 0.226 0.144 0.0000082
0 Tlk 0.0e+00 0.2818009 0.344 0.246 0.0000097
0 DIP-alpha 0.0e+00 0.2632775 0.558 0.449 0.0000102
0 CG5532 0.0e+00 0.3024152 0.167 0.099 0.0000163
0 mbl 0.0e+00 0.7776044 0.189 0.122 0.0000176
0 CG5888 0.0e+00 0.2571891 0.308 0.210 0.0000196
0 Rtnl1 0.0e+00 0.2931867 0.462 0.354 0.0000337
0 CtsB1 0.0e+00 -0.2762803 0.666 0.693 0.0000425
0 CREG 0.0e+00 0.2887911 0.156 0.092 0.0000577
0 CG43736 0.0e+00 0.2682683 0.623 0.527 0.0000857
0 EcR 0.0e+00 0.2759130 0.371 0.277 0.0001433
0 CG15628 0.0e+00 0.2920605 0.127 0.071 0.0001448
0 ReepB 0.0e+00 0.2957091 0.285 0.202 0.0001587
0 CG3625 0.0e+00 0.2701941 0.346 0.253 0.0002275
0 RpL34a 0.0e+00 0.2513037 0.373 0.278 0.0003184
0 CG9953 0.0e+00 0.2502377 0.179 0.112 0.0003857
0 Fkbp14 0.0e+00 0.4918415 0.174 0.113 0.0004198
0 CG13551 0.0e+00 0.2807003 0.141 0.083 0.0005300
0 tws 1.0e-07 0.2565189 0.115 0.064 0.0008428
0 Aps 1.0e-07 0.3149364 0.128 0.074 0.0008879
0 Hsc70-3 1.0e-07 0.4156926 0.396 0.313 0.0009788
0 dnc 1.0e-07 -0.5879672 0.082 0.132 0.0011284
0 Myo10A 1.0e-07 -0.3726255 0.311 0.378 0.0014358
0 Neb-cGP 4.0e-07 0.2748693 0.246 0.174 0.0050797
0 EMC4 4.0e-07 0.2538167 0.254 0.182 0.0061067
0 Parp 5.0e-07 0.3176918 0.312 0.238 0.0069456
0 CG3036 6.0e-07 0.2800132 0.128 0.078 0.0078968
0 CG4194 6.0e-07 0.2534113 0.302 0.225 0.0084800
0 l(3)80Fj 1.1e-06 -0.3039318 0.485 0.526 0.0151807
0 CG14715 1.4e-06 0.2888570 0.219 0.155 0.0190207
0 tinc 2.1e-06 0.2714337 0.287 0.218 0.0296504
0 Spp 2.4e-06 0.2766302 0.210 0.150 0.0328441
0 wbl 2.6e-06 0.3532197 0.223 0.161 0.0366790
0 mt:ND3 2.6e-06 -0.3754520 0.288 0.334 0.0366791
1 CG13427 0.0e+00 1.5388423 0.739 0.340 0.0000000
1 osp 0.0e+00 -1.3174307 0.613 0.812 0.0000000
1 CG10035 0.0e+00 1.1497241 0.413 0.162 0.0000000
1 CG43759 0.0e+00 0.6889782 0.746 0.537 0.0000000
1 Cys 0.0e+00 0.6031655 0.756 0.556 0.0000000
1 rdo 0.0e+00 -1.0277144 0.309 0.475 0.0000000
1 Idgf6 0.0e+00 0.7915849 0.397 0.201 0.0000000
1 bnb 0.0e+00 0.6086054 0.545 0.311 0.0000000
1 robo2 0.0e+00 0.4502538 0.873 0.728 0.0000000
1 link 0.0e+00 0.6990752 0.276 0.116 0.0000000
1 Gas8 0.0e+00 -0.8025725 0.442 0.577 0.0000000
1 CG9650 0.0e+00 -0.5791487 0.519 0.669 0.0000000
1 retn 0.0e+00 0.6338401 0.462 0.272 0.0000000
1 CG43658 0.0e+00 0.4575916 0.718 0.553 0.0000000
1 ed 0.0e+00 0.3701806 0.858 0.781 0.0000000
1 Scgdelta 0.0e+00 -1.0087042 0.163 0.293 0.0000000
1 hth 0.0e+00 0.2971529 0.988 0.964 0.0000000
1 Hsc70-3 0.0e+00 0.7288367 0.264 0.126 0.0000000
1 phu 0.0e+00 -1.1779969 0.073 0.182 0.0000000
1 cathD 0.0e+00 0.4575337 0.582 0.376 0.0000000
1 pre-lola-G 0.0e+00 0.5676676 0.520 0.340 0.0000000
1 CG6040 0.0e+00 -1.1587814 0.129 0.253 0.0000000
1 Cirl 0.0e+00 -0.5160471 0.718 0.756 0.0000000
1 dlp 0.0e+00 0.5451768 0.248 0.115 0.0000000
1 GILT2 0.0e+00 0.7290838 0.145 0.047 0.0000000
1 Wnt6 0.0e+00 -0.7877542 0.276 0.399 0.0000000
1 tinc 0.0e+00 0.5298857 0.378 0.213 0.0000000
1 Svil 0.0e+00 -0.8495419 0.296 0.415 0.0000000
1 EMC4 0.0e+00 0.5713105 0.287 0.145 0.0000000
1 Ran 0.0e+00 0.4304019 0.453 0.265 0.0000000
1 Ten-m 0.0e+00 0.4382654 0.667 0.499 0.0000000
1 kmr 0.0e+00 0.4469308 0.452 0.285 0.0000000
1 Pka-C3 0.0e+00 -0.8321501 0.220 0.340 0.0000000
1 CycE 0.0e+00 0.4709506 0.326 0.180 0.0000000
1 CG34355 0.0e+00 -0.8719939 0.226 0.341 0.0000000
1 sdk 0.0e+00 0.4293131 0.529 0.351 0.0000001
1 CG5888 0.0e+00 0.5562232 0.260 0.131 0.0000001
1 CG42238 0.0e+00 0.3527556 0.774 0.622 0.0000001
1 vir-1 0.0e+00 -0.9425463 0.158 0.268 0.0000001
1 Tl 0.0e+00 0.7130511 0.259 0.137 0.0000002
1 CG6398 0.0e+00 0.4695036 0.288 0.154 0.0000002
1 Egfr 0.0e+00 0.3268104 0.757 0.605 0.0000002
1 Parp 0.0e+00 0.5982315 0.236 0.118 0.0000003
1 CadN 0.0e+00 0.3411816 0.826 0.721 0.0000003
1 Myo81F 0.0e+00 -0.7339499 0.334 0.434 0.0000003
1 LpR2 0.0e+00 -0.8022149 0.278 0.381 0.0000005
1 Dbp80 0.0e+00 0.5255974 0.416 0.261 0.0000005
1 dnt 0.0e+00 0.6148003 0.248 0.129 0.0000006
1 stg 0.0e+00 0.4622221 0.513 0.365 0.0000010
1 sm 0.0e+00 0.4385693 0.480 0.310 0.0000011
1 CG9953 0.0e+00 0.4958255 0.225 0.112 0.0000016
1 CG2493 0.0e+00 0.4585636 0.352 0.207 0.0000019
1 CG32982 0.0e+00 0.7913797 0.102 0.032 0.0000029
1 alpha4GT2 0.0e+00 -0.7165433 0.225 0.330 0.0000047
1 conu 0.0e+00 0.4283287 0.338 0.202 0.0000048
1 nuf 0.0e+00 0.3537159 0.649 0.490 0.0000050
1 out 0.0e+00 0.5631054 0.204 0.101 0.0000051
1 Fas3 0.0e+00 0.6010959 0.144 0.060 0.0000070
1 CG31221 0.0e+00 -0.7583848 0.248 0.349 0.0000071
1 rad 0.0e+00 -1.0590452 0.126 0.224 0.0000077
1 CG13917 0.0e+00 0.5423901 0.196 0.097 0.0000144
1 mirr 0.0e+00 0.6307822 0.237 0.130 0.0000158
1 ZnT77C 0.0e+00 0.5784871 0.153 0.067 0.0000166
1 Six4 0.0e+00 0.4638255 0.241 0.130 0.0000186
1 CG6701 0.0e+00 0.4951194 0.219 0.116 0.0000206
1 miple2 0.0e+00 0.3377908 0.691 0.537 0.0000219
1 ERp60 0.0e+00 0.3602825 0.241 0.127 0.0000227
1 if 0.0e+00 0.4561300 0.375 0.242 0.0000239
1 Idgf2 0.0e+00 0.4482989 0.180 0.085 0.0000242
1 CG4440 0.0e+00 0.3164160 0.796 0.693 0.0000260
1 CG13784 0.0e+00 0.3924220 0.259 0.141 0.0000324
1 lmd 0.0e+00 -1.0165089 0.122 0.213 0.0000418
1 KCNQ 0.0e+00 -0.8189087 0.180 0.273 0.0000455
1 Act5C 0.0e+00 0.3711427 0.455 0.304 0.0000465
1 Ilp4 0.0e+00 0.2895294 0.640 0.487 0.0000502
1 Spn 0.0e+00 0.3783396 0.255 0.142 0.0000550
1 ec 0.0e+00 0.3889931 0.278 0.160 0.0000583
1 RYa-R 0.0e+00 -0.7532783 0.232 0.327 0.0000637
1 SKIP 0.0e+00 0.3065673 0.776 0.630 0.0000695
1 Cam 0.0e+00 0.5268470 0.148 0.066 0.0000743
1 CG6891 0.0e+00 -0.9422046 0.099 0.184 0.0000761
1 Catsup 0.0e+00 0.4443763 0.173 0.083 0.0000777
1 CG8563 0.0e+00 -1.0038148 0.112 0.197 0.0000943
1 hang 0.0e+00 0.3050258 0.361 0.227 0.0001384
1 mt:ND6 0.0e+00 -0.5988134 0.322 0.404 0.0001474
1 gukh 0.0e+00 0.3220468 0.399 0.260 0.0001652
1 MYPT-75D 0.0e+00 0.4038993 0.520 0.381 0.0001720
1 stumps 0.0e+00 0.4488886 0.250 0.143 0.0001796
1 Pzl 0.0e+00 -0.6821574 0.185 0.275 0.0002467
1 CG10470 0.0e+00 0.4075757 0.249 0.143 0.0002962
1 CG3036 0.0e+00 0.4366808 0.124 0.052 0.0002965
1 bs 0.0e+00 -0.6773736 0.190 0.283 0.0003007
1 UQCR-11 0.0e+00 0.3693844 0.334 0.209 0.0003174
1 wech 0.0e+00 0.4012254 0.455 0.310 0.0003297
1 CG4408 0.0e+00 0.3978967 0.163 0.079 0.0003388
1 DOR 0.0e+00 0.4822032 0.132 0.059 0.0003738
1 CG17752 0.0e+00 -0.8540857 0.102 0.179 0.0006808
1 lola 0.0e+00 -0.3066308 0.889 0.895 0.0006898
1 CG32264 1.0e-07 0.3116203 0.313 0.195 0.0007031
1 fend 1.0e-07 0.4994655 0.163 0.082 0.0009205
1 sli 1.0e-07 0.3494291 0.271 0.162 0.0009218
1 RpS5b 1.0e-07 0.2860292 0.530 0.389 0.0009515
1 htl 1.0e-07 0.3825398 0.207 0.115 0.0011373
1 Dl 1.0e-07 0.2763906 0.837 0.785 0.0013857
1 CG1307 1.0e-07 0.3540359 0.229 0.131 0.0014168
1 cl 1.0e-07 0.4403837 0.125 0.056 0.0015491
1 CG14715 1.0e-07 0.3352900 0.208 0.114 0.0015633
1 smash 1.0e-07 0.3511273 0.196 0.106 0.0016583
1 CG3655 1.0e-07 0.2836765 0.562 0.410 0.0016624
1 ATPsynE 1.0e-07 0.3701592 0.208 0.116 0.0017487
1 tsr 1.0e-07 0.3295509 0.501 0.361 0.0019364
1 betaTub56D 1.0e-07 0.2691788 0.663 0.514 0.0019832
1 Aps 2.0e-07 0.4160563 0.120 0.053 0.0022432
1 zfh1 2.0e-07 0.3760996 0.509 0.378 0.0023076
1 ogre 2.0e-07 0.2926593 0.463 0.322 0.0029834
1 CG9220 4.0e-07 0.3714204 0.143 0.070 0.0049030
1 CG15012 4.0e-07 0.3116313 0.348 0.228 0.0054488
1 px 4.0e-07 0.2965067 0.586 0.442 0.0058142
1 CG30122 4.0e-07 0.3033960 0.437 0.303 0.0058315
1 RNASEK 5.0e-07 0.3055746 0.501 0.355 0.0064599
1 comm2 7.0e-07 0.3035660 0.249 0.153 0.0098647
1 nrv2 7.0e-07 0.2738411 0.227 0.134 0.0099974
1 shv 8.0e-07 0.3524068 0.209 0.122 0.0118467
1 tsh 9.0e-07 0.3094954 0.322 0.213 0.0128100
1 DIP-alpha 9.0e-07 0.2670935 0.521 0.382 0.0128691
1 KdelR 9.0e-07 0.3085526 0.303 0.194 0.0130264
1 Stlk 9.0e-07 0.3097630 0.131 0.064 0.0130335
1 sesB 1.0e-06 0.2993338 0.397 0.270 0.0145973
1 CG2865 1.4e-06 0.3042738 0.274 0.174 0.0192559
1 CG43163 1.5e-06 -0.7099876 0.187 0.260 0.0210507
1 Hs6st 1.5e-06 0.4506605 0.349 0.245 0.0211149
1 Rtnl1 1.9e-06 0.2519116 0.342 0.228 0.0268157
1 CG42762 2.0e-06 0.3507953 0.114 0.054 0.0273650
1 Gs2 2.3e-06 -0.9325743 0.074 0.134 0.0316877
1 noc 2.8e-06 0.3757280 0.226 0.139 0.0388266
1 klar 3.1e-06 0.2762723 0.617 0.482 0.0433475
1 CG4572 3.2e-06 0.3550648 0.121 0.060 0.0442030
1 bou 3.5e-06 0.3160806 0.276 0.179 0.0485619
2 CG13427 0.0e+00 1.1000824 0.792 0.588 0.0000000
2 CG10035 0.0e+00 1.0254688 0.556 0.351 0.0000000
2 Hr4 0.0e+00 1.2248561 0.245 0.097 0.0000000
2 RpS5b 0.0e+00 0.3832897 0.582 0.384 0.0000012
2 Idgf6 0.0e+00 0.4894564 0.299 0.168 0.0000706
2 RpL19 0.0e+00 0.3810138 0.846 0.716 0.0000726
2 RpL10 0.0e+00 0.3240600 0.858 0.776 0.0000729
2 bnb 0.0e+00 0.4484963 0.378 0.222 0.0000781
2 ft 0.0e+00 0.6675270 0.178 0.074 0.0000839
2 CG5532 0.0e+00 0.4095497 0.208 0.094 0.0001833
2 CG10470 0.0e+00 0.6003342 0.213 0.100 0.0001989
2 RpL36A 0.0e+00 0.2871229 0.816 0.685 0.0005177
2 Dad1 0.0e+00 0.4988110 0.327 0.191 0.0005421
2 RpS27A 1.0e-07 0.3484412 0.818 0.697 0.0010012
2 ed 2.0e-07 0.3412938 0.643 0.496 0.0021516
2 Bacc 2.0e-07 -0.4074966 0.979 0.972 0.0022710
2 RpL7 2.0e-07 0.2724327 0.776 0.624 0.0031043
2 mt:Cyt-b 2.0e-07 0.3371468 0.731 0.579 0.0032068
2 Cirl 3.0e-07 -0.5187494 0.716 0.729 0.0039538
2 RpL21 3.0e-07 0.2788101 0.836 0.722 0.0041580
2 eEF5 4.0e-07 0.2650421 0.854 0.754 0.0053201
2 cni 4.0e-07 0.3795292 0.322 0.193 0.0058629
2 fne 6.0e-07 0.7598458 0.192 0.097 0.0090775
2 CG1307 8.0e-07 0.3083638 0.231 0.125 0.0117474
2 RpS28b 9.0e-07 0.2796570 0.751 0.604 0.0125165
2 side 1.0e-06 -0.9381816 0.236 0.339 0.0133835
2 CREG 1.1e-06 0.4681062 0.109 0.040 0.0148548
2 chn 1.3e-06 0.4659088 0.192 0.097 0.0176422
2 sdt 1.4e-06 0.6405188 0.150 0.069 0.0193212
2 RpL17 1.7e-06 0.2531692 0.792 0.679 0.0231393
2 ATPsynCF6 1.8e-06 0.3163246 0.287 0.168 0.0246600
2 Parp 2.1e-06 0.5073335 0.273 0.165 0.0294565
2 grh 2.6e-06 0.5420280 0.151 0.071 0.0367412
2 awd 2.9e-06 0.2824276 0.446 0.302 0.0409742
2 l(1)sc 3.0e-06 0.3679244 0.133 0.059 0.0422781
2 TfIIFalpha 3.5e-06 0.4631746 0.165 0.081 0.0493593
3 CG13427 0.0e+00 1.6951286 0.798 0.429 0.0000000
3 CG10035 0.0e+00 1.1723723 0.427 0.151 0.0000000
3 Cys 0.0e+00 0.5813460 0.912 0.839 0.0000037
3 CG2852 0.0e+00 0.3157994 0.921 0.817 0.0000124
3 CG16791 0.0e+00 0.5553459 0.671 0.509 0.0004519
3 Cirl 0.0e+00 -0.4578244 0.735 0.783 0.0005113
3 CG4572 1.0e-07 0.6151116 0.146 0.049 0.0017553
3 ImpE2 2.0e-07 0.9703899 0.223 0.110 0.0030729
3 Hr4 3.0e-07 1.2661320 0.172 0.072 0.0036459
3 Idgf6 7.0e-07 0.5027167 0.431 0.282 0.0092876
3 Parp 7.0e-07 0.5555477 0.311 0.176 0.0104642
3 CG10470 1.1e-06 0.5585752 0.327 0.187 0.0152252
3 Shrm 1.2e-06 0.4370361 0.350 0.208 0.0168636
3 Zasp52 1.2e-06 0.9985217 0.100 0.028 0.0171718
3 Rtnl1 1.3e-06 0.4856608 0.455 0.302 0.0176023
3 Tet 1.9e-06 0.3498486 0.837 0.796 0.0267638
3 sm 2.0e-06 0.6583873 0.441 0.301 0.0277272
3 cathD 2.8e-06 0.4774750 0.522 0.380 0.0395015
3 osp 3.5e-06 -0.7763587 0.619 0.654 0.0483054
3 Ten-m 3.6e-06 0.5394852 0.434 0.295 0.0499165
4 CG13427 0.0e+00 0.8666367 0.879 0.765 0.0000000
4 Cirl 0.0e+00 -0.5261713 0.903 0.947 0.0000000
4 CG10035 0.0e+00 1.0411510 0.558 0.346 0.0000000
4 GILT1 1.0e-07 1.0212198 0.232 0.099 0.0008868
4 CG2493 1.0e-07 0.6848406 0.351 0.189 0.0009765
4 CG9628 1.0e-07 1.1401596 0.113 0.022 0.0013229
4 Bacc 3.0e-07 -0.3715391 0.956 0.978 0.0037790
4 RpL9 4.0e-07 0.3921901 0.817 0.705 0.0056201
4 Cys 4.0e-07 0.4400907 0.800 0.671 0.0061714
4 CG5532 1.5e-06 0.5957941 0.190 0.077 0.0205699
5 CG13427 0.0e+00 1.3384673 0.584 0.273 0.0000000
5 nmo 0.0e+00 0.5546769 0.944 0.848 0.0000000
5 hth 0.0e+00 0.4874238 0.929 0.843 0.0000034
5 Hsc70-3 0.0e+00 0.8858935 0.208 0.063 0.0000238
5 CG10035 0.0e+00 1.0101150 0.275 0.116 0.0000707
5 lmd 0.0e+00 -0.8572458 0.380 0.546 0.0000905
5 fax 0.0e+00 -0.5333784 0.738 0.814 0.0001061
5 rols 0.0e+00 -1.0087136 0.245 0.401 0.0001261
5 Parp 0.0e+00 0.7868533 0.227 0.087 0.0004359
5 Hr4 0.0e+00 1.2707828 0.212 0.080 0.0005638
5 bnb 2.0e-07 0.3649197 0.657 0.449 0.0023327
5 CG9650 2.0e-07 -0.5704536 0.710 0.771 0.0023422
5 CG10470 2.0e-07 0.7249847 0.187 0.065 0.0031768
5 rdgA 3.0e-07 0.9005706 0.307 0.159 0.0041041
5 RpL13A 3.0e-07 0.4236945 0.785 0.606 0.0044647
5 stg 5.0e-07 0.4611094 0.682 0.507 0.0072604
5 luna 7.0e-07 -0.5944999 0.567 0.693 0.0092767
5 Fas2 1.2e-06 1.2014407 0.139 0.043 0.0172339
5 Myc 1.3e-06 0.7343003 0.410 0.251 0.0176258
5 CG43163 1.8e-06 -1.2600116 0.045 0.133 0.0258059
5 Con 1.9e-06 -0.8393005 0.170 0.300 0.0269738
5 ab 2.2e-06 1.0054547 0.258 0.130 0.0305204
5 RpLP1 2.9e-06 0.3287054 0.852 0.758 0.0406019
6 CG10035 0.0e+00 0.9901292 0.688 0.444 0.0000000
6 chn 0.0e+00 0.6314384 0.583 0.284 0.0000061
6 CG13427 0.0e+00 0.7464775 0.913 0.799 0.0000095
6 Cys 0.0e+00 0.7303117 0.903 0.776 0.0000369
6 Cirl 2.0e-07 -0.4732671 0.858 0.877 0.0024057
6 Brd 2.0e-07 -0.6653823 0.795 0.862 0.0029729
6 sm 3.0e-07 0.5866523 0.688 0.444 0.0040045
6 CG1273 8.0e-07 1.2990808 0.115 0.011 0.0107627
6 GILT1 9.0e-07 1.1263516 0.205 0.060 0.0127555
6 CG4440 1.1e-06 -0.4059306 0.955 0.970 0.0151603
6 Hr4 2.4e-06 0.9591102 0.194 0.056 0.0328798
6 tinc 3.5e-06 0.8023578 0.260 0.104 0.0489679
7 CG13427 0.0e+00 0.8813352 0.962 0.898 0.0000000
7 Cys 0.0e+00 1.2305356 0.802 0.577 0.0000006
7 CG10035 0.0e+00 0.7781656 0.759 0.488 0.0000012
8 CG13427 0.0e+00 1.2698285 0.840 0.491 0.0000000
8 bnb 2.0e-07 0.9823095 0.452 0.181 0.0024240
8 link 7.0e-07 0.9551862 0.330 0.105 0.0094788
11 CG13427 3.1e-06 1.9949331 0.870 0.417 0.0427605

3.2 Psuedobulk

gene baseMean log2FoldChange lfcSE stat pvalue padj
osp 3371.66494 0.9490602 0.0980995 9.674461 0.0000000 0.0000000
CG10035 4705.93724 -0.9995277 0.1132538 -8.825558 0.0000000 0.0000000
CG13427 13676.22938 -0.8986442 0.1081934 -8.305907 0.0000000 0.0000000
CG42613 108.39429 2.4600238 0.3170232 7.759759 0.0000000 0.0000000
Idgf6 1094.75742 -0.9109371 0.1302276 -6.994964 0.0000000 0.0000000
lmd 428.88985 1.1360289 0.1812152 6.268948 0.0000000 0.0000004
phu 290.26649 1.2787897 0.2158293 5.925005 0.0000000 0.0000028
CG9628 139.73972 -1.7471450 0.3053385 -5.721994 0.0000000 0.0000083
Obp56a 48.59772 -2.7561499 0.4859791 -5.671335 0.0000000 0.0000099
tsr 2352.37698 -0.5998031 0.1063917 -5.637684 0.0000000 0.0000108
rdo 645.50635 0.8950607 0.1628721 5.495481 0.0000000 0.0000223
rad 479.28690 0.8555077 0.1595846 5.360842 0.0000001 0.0000434
CG2493 828.97511 -0.6893636 0.1304026 -5.286425 0.0000001 0.0000604
CG14111 76.19917 -1.8625821 0.3652609 -5.099319 0.0000003 0.0001410
Cyp4e2 43.71996 2.5293021 0.4969437 5.089716 0.0000004 0.0001410
CG7296 189.60361 -1.4347423 0.2813289 -5.099875 0.0000003 0.0001410
CG31221 476.54109 0.8571343 0.1751604 4.893425 0.0000010 0.0003134
CG8563 267.16907 0.9689687 0.1980493 4.892564 0.0000010 0.0003134
veil 83.75722 -1.6755630 0.3406780 -4.918318 0.0000009 0.0003134
Gas8 1210.82989 0.6476917 0.1323878 4.892384 0.0000010 0.0003134
Parp 1064.78718 -0.6093058 0.1253452 -4.861021 0.0000012 0.0003403
Nplp2 1178.26941 0.6833407 0.1406842 4.857266 0.0000012 0.0003403
nrm 2703.88656 0.5641002 0.1167212 4.832887 0.0000013 0.0003681
FASN3 85.55433 -1.6830227 0.3564950 -4.721028 0.0000023 0.0006038
cathD 2179.34841 -0.5464930 0.1160654 -4.708493 0.0000025 0.0006038
CG8654 136.42642 -1.2546929 0.2660543 -4.715929 0.0000024 0.0006038
Idgf2 378.94674 -0.9095661 0.1938646 -4.691758 0.0000027 0.0006311
CG34355 459.54911 0.8310359 0.1774481 4.683263 0.0000028 0.0006344
CG5888 773.13486 -0.6242110 0.1340532 -4.656441 0.0000032 0.0006979
Obp99a 9857.38865 0.5276080 0.1143552 4.613764 0.0000040 0.0008292
mt:ND2 761.51223 0.6559394 0.1427448 4.595190 0.0000043 0.0008774
phm 70.65118 -1.6841967 0.3713874 -4.534878 0.0000058 0.0011331
Pka-C3 453.64303 0.7460257 0.1659625 4.495147 0.0000070 0.0013253
pdm2 432.45661 -0.7816561 0.1772184 -4.410693 0.0000103 0.0019065
CG1307 547.57909 -0.7024100 0.1598180 -4.395061 0.0000111 0.0019905
LpR2 485.28088 0.7121147 0.1636429 4.351638 0.0000135 0.0022370
Cirl 6972.06739 0.4561435 0.1046606 4.358310 0.0000131 0.0022370
Gs2 156.17403 1.1316801 0.2598604 4.354954 0.0000133 0.0022370
CG5532 482.66490 -0.7179145 0.1652023 -4.345669 0.0000139 0.0022398
sm 2572.43735 -0.4422976 0.1025341 -4.313664 0.0000161 0.0024638
cato 103.25407 1.4048891 0.3253462 4.318136 0.0000157 0.0024638
RpS5b 2427.77270 -0.4635646 0.1078964 -4.296387 0.0000174 0.0026003
Catsup 510.56063 -0.6765864 0.1577423 -4.289189 0.0000179 0.0026236
Nepl3 40.29536 -2.2063523 0.5160584 -4.275393 0.0000191 0.0027280
Scgdelta 323.11217 0.8003797 0.1880711 4.255728 0.0000208 0.0029130
Hr4 534.78470 -1.7401462 0.4113390 -4.230443 0.0000233 0.0031897
CG10470 679.92442 -0.6445945 0.1526552 -4.222552 0.0000242 0.0032332
Aps 360.73414 -0.7347593 0.1749598 -4.199589 0.0000267 0.0035046
CG43736 3129.86588 -0.4398480 0.1052091 -4.180701 0.0000291 0.0037311
CG13033 52.00650 1.7763695 0.4264010 4.165959 0.0000310 0.0039010
CG4408 318.76152 -0.7968768 0.1925933 -4.137615 0.0000351 0.0041655
CG13377 100.16912 -1.4868064 0.3592023 -4.139189 0.0000349 0.0041655
Reph 103.07602 -1.2538334 0.3028410 -4.140236 0.0000347 0.0041655
Lip4 86.65617 1.5899310 0.3882417 4.095210 0.0000422 0.0049138
CG13784 1128.87575 -0.4995851 0.1223600 -4.082912 0.0000445 0.0050871
bnb 4011.41523 -0.3948169 0.0972109 -4.061446 0.0000488 0.0054788
CG13003 330.03860 0.7699454 0.1898209 4.056168 0.0000499 0.0055057
Cys 11268.01736 -0.5062853 0.1249653 -4.051407 0.0000509 0.0055220
sesB 1756.90038 -0.4360526 0.1087833 -4.008450 0.0000611 0.0065169
tey 73.23687 1.4787250 0.3735138 3.958957 0.0000753 0.0078929
KCNQ 307.39503 0.7277312 0.1843144 3.948314 0.0000787 0.0081168
CG31937 46.08345 -1.8405427 0.4678963 -3.933656 0.0000837 0.0084892
glob1 206.81591 0.9693967 0.2469193 3.925965 0.0000864 0.0086259
Hsp27 236.71608 -0.8102856 0.2079894 -3.895803 0.0000979 0.0096207
CG9953 541.82985 -0.5800716 0.1496783 -3.875456 0.0001064 0.0103003
na 150.49351 0.9667979 0.2518154 3.839311 0.0001234 0.0117442
nub 745.15093 -0.5697148 0.1485197 -3.835955 0.0001251 0.0117442
pre-lola-G 2112.42016 -0.4074575 0.1070246 -3.807138 0.0001406 0.0130061
RYa-R 416.80119 0.7086618 0.1863461 3.802933 0.0001430 0.0130372
ImpE2 148.51406 -0.9699839 0.2554827 -3.796671 0.0001467 0.0131798
EMC4 803.30447 -0.5446533 0.1447556 -3.762572 0.0001682 0.0149013
GILT2 317.09899 -0.6789910 0.1817843 -3.735146 0.0001876 0.0163921
Ilp4 1438.63908 -0.4529996 0.1215218 -3.727721 0.0001932 0.0166512
moon 40.37415 -1.8961978 0.5114145 -3.707751 0.0002091 0.0177770
CG13082 25.08776 -2.4520137 0.6621053 -3.703359 0.0002128 0.0178466
Brd 5473.72531 0.4365291 0.1182877 3.690401 0.0002239 0.0185337
awd 1521.82851 -0.4400794 0.1197062 -3.676328 0.0002366 0.0193318
CREG 325.16037 -0.6629255 0.1810676 -3.661204 0.0002510 0.0202468
CG4572 277.80997 -0.7081529 0.1945458 -3.640032 0.0002726 0.0217082
RpL19 7319.44753 -0.3819032 0.1055493 -3.618247 0.0002966 0.0233243
Alk 452.33980 -0.6323746 0.1758174 -3.596768 0.0003222 0.0250239
CG15083 63.82051 -1.4712384 0.4110384 -3.579321 0.0003445 0.0261105
CG7294 74.29311 -1.3653152 0.3811404 -3.582184 0.0003407 0.0261105
Myo81F 856.12185 0.4800521 0.1349855 3.556323 0.0003761 0.0266179
CG6845 209.27556 0.7910928 0.2226520 3.553045 0.0003808 0.0266179
Mal-A5 91.11093 -1.1825593 0.3315319 -3.566955 0.0003612 0.0266179
CG6398 1480.22336 -0.4102890 0.1151001 -3.564627 0.0003644 0.0266179
spri 842.77155 -0.4665542 0.1311765 -3.556691 0.0003756 0.0266179
Wnt6 564.55351 0.6128538 0.1724620 3.553558 0.0003801 0.0266179
dpr3 281.10956 0.7203748 0.2021935 3.562800 0.0003669 0.0266179
alpha4GT2 381.38354 0.6602753 0.1868419 3.533872 0.0004095 0.0283109
PIG-T 631.51665 0.5472941 0.1551630 3.527221 0.0004199 0.0287161
Rtnl1 1526.66297 -0.4536499 0.1287622 -3.523160 0.0004264 0.0288461
RpL36A 6155.47223 -0.3421061 0.0972052 -3.519420 0.0004325 0.0289447
CG17752 203.89616 0.7939785 0.2280338 3.481845 0.0004980 0.0329762
RpLP1 10800.45982 -0.3267212 0.0941364 -3.470720 0.0005191 0.0340149
CG32850 1892.99521 0.4241002 0.1223178 3.467200 0.0005259 0.0341082
Scp2 52.32488 1.4231960 0.4171429 3.411771 0.0006454 0.0414322
RpL9 5652.95552 -0.3504997 0.1031448 -3.398133 0.0006785 0.0431140
CG42673 49.12974 -1.8069396 0.5354511 -3.374612 0.0007392 0.0460892
Spn 1248.54191 -0.4269382 0.1265252 -3.374333 0.0007399 0.0460892
RpLP2 8725.90780 -0.3327300 0.0988199 -3.367033 0.0007598 0.0468628
Shrm 842.50444 -0.4365074 0.1298457 -3.361738 0.0007745 0.0471059
CG44325 67.37006 -1.2647049 0.3766693 -3.357600 0.0007862 0.0471059
vir-1 438.02843 0.5536773 0.1648364 3.358950 0.0007824 0.0471059
CG11905 42.73644 -1.5832051 0.4727176 -3.349156 0.0008106 0.0481072
gene p_val avg_log2FC pct.1 pct.2 p_val_adj
CG13427 0.0e+00 0.8502533 0.844 0.618 0.0000000
CG10035 0.0e+00 0.8504618 0.565 0.343 0.0000000
Cys 0.0e+00 0.5029477 0.832 0.700 0.0000000
Cirl 0.0e+00 -0.4566326 0.795 0.819 0.0000000
Idgf6 0.0e+00 0.5625596 0.341 0.193 0.0000000
Hr4 0.0e+00 1.1074823 0.153 0.056 0.0000000
Bacc 0.0e+00 -0.2774134 0.952 0.961 0.0000000
bnb 0.0e+00 0.3146393 0.605 0.442 0.0000000
tsr 0.0e+00 0.3827536 0.537 0.394 0.0000000
cathD 0.0e+00 0.3885886 0.514 0.369 0.0000000
GILT1 0.0e+00 0.7972865 0.114 0.043 0.0000000
RpS5b 0.0e+00 0.3309076 0.556 0.409 0.0000000
CG2493 0.0e+00 0.3936189 0.286 0.170 0.0000000
mt:ND6 0.0e+00 -0.5583755 0.383 0.457 0.0000000
CG10470 0.0e+00 0.4214178 0.243 0.143 0.0000000
CG5888 0.0e+00 0.3654088 0.261 0.156 0.0000000
Parp 0.0e+00 0.4388716 0.303 0.197 0.0000000
CG1307 0.0e+00 0.3687818 0.204 0.115 0.0000000
sm 0.0e+00 0.3623828 0.540 0.411 0.0000000
CG13784 0.0e+00 0.3349819 0.331 0.219 0.0000000
Catsup 0.0e+00 0.3366220 0.193 0.110 0.0000000
CG5532 0.0e+00 0.3060111 0.179 0.099 0.0000000
Idgf2 0.0e+00 0.3449717 0.147 0.075 0.0000000
EMC4 0.0e+00 0.3678109 0.267 0.171 0.0000000
CG43736 0.0e+00 0.2905507 0.567 0.448 0.0000000
Rtnl1 0.0e+00 0.3206116 0.414 0.303 0.0000000
sesB 0.0e+00 0.2849543 0.449 0.330 0.0000000
CG9953 0.0e+00 0.3274159 0.199 0.120 0.0000000
GILT2 0.0e+00 0.3188759 0.118 0.059 0.0000000
CG4408 0.0e+00 0.2902988 0.129 0.067 0.0000000
CG10077 0.0e+00 0.2576343 0.433 0.323 0.0000000
chn 0.0e+00 0.2604319 0.539 0.425 0.0000000
CG15012 0.0e+00 0.2627413 0.322 0.224 0.0000000
Aps 0.0e+00 0.3315057 0.137 0.076 0.0000000
Hsc70-3 0.0e+00 0.2924539 0.354 0.261 0.0000000
pre-lola-G 0.0e+00 0.2670185 0.446 0.340 0.0000000
osp 0.0e+00 -0.9535577 0.334 0.388 0.0000000
CG13917 0.0e+00 0.2731839 0.262 0.178 0.0000000
Dad1 0.0e+00 0.2547453 0.280 0.192 0.0000000
CG14715 0.0e+00 0.2674884 0.251 0.168 0.0000000
CG6398 0.0e+00 0.2782824 0.378 0.280 0.0000000
Ten-m 0.0e+00 0.2557801 0.517 0.410 0.0000000
smash 0.0e+00 0.2544797 0.289 0.203 0.0000000
pdm2 0.0e+00 0.3183820 0.123 0.069 0.0000000
CG4572 0.0e+00 0.2648611 0.114 0.063 0.0000000
CG3625 0.0e+00 0.2712677 0.300 0.216 0.0000000
CREG 0.0e+00 0.2766333 0.112 0.063 0.0000000
cl 0.0e+00 0.2607151 0.116 0.067 0.0000000
mt:ND2 0.0e+00 -0.5140193 0.183 0.237 0.0000000
Shrm 0.0e+00 0.2579052 0.237 0.167 0.0000000
Ilp4 0.0e+00 0.2640017 0.309 0.235 0.0000000
tinc 0.0e+00 0.2562071 0.259 0.191 0.0000000
rdo 0.0e+00 -0.6856741 0.118 0.165 0.0000000
Brd 0.0e+00 -0.3896540 0.599 0.601 0.0000000
nub 0.0e+00 0.2710202 0.148 0.100 0.0000000
CG31221 0.0e+00 -0.5862591 0.096 0.135 0.0000000
CG34355 0.0e+00 -0.5540961 0.097 0.134 0.0000001
wbl 0.0e+00 0.3079175 0.116 0.078 0.0000001
mt:ND3 0.0e+00 -0.4067587 0.286 0.317 0.0000005
Pka-C3 0.0e+00 -0.5198433 0.091 0.125 0.0000008
Gas8 0.0e+00 -0.5978695 0.183 0.220 0.0000027
Scgdelta 0.0e+00 -0.5163235 0.071 0.100 0.0000122
LpR2 0.0e+00 -0.5391405 0.103 0.132 0.0004712
CG6040 1.0e-07 -0.5234392 0.083 0.110 0.0009684
alpha4GT2 1.0e-07 -0.4422851 0.086 0.113 0.0012809
RYa-R 1.0e-07 -0.4824968 0.093 0.119 0.0018997
Obp99a 1.0e-07 -0.3776808 0.514 0.526 0.0019892
bbg 2.0e-07 0.2617571 0.107 0.079 0.0029730
klu 5.0e-07 0.2572902 0.227 0.188 0.0068642
bs 8.0e-07 -0.3737178 0.084 0.108 0.0108309
mbl 1.4e-06 0.2827754 0.261 0.224 0.0193116

DESeq2 finds 106 differentially expressed genes and Seurat finds 71 DEGs with 49 genes overlapping between the two methods.

List of overlapping genes separated by a semicolon: osp; CG10035; CG13427; Idgf6; tsr; rdo; CG2493; CG31221; Gas8; Parp; cathD; Idgf2; CG34355; CG5888; Obp99a; mt:ND2; Pka-C3; pdm2; CG1307; LpR2; Cirl; CG5532; sm; RpS5b; Catsup; Scgdelta; Hr4; CG10470; Aps; CG43736; CG4408; CG13784; bnb; Cys; sesB; CG9953; nub; pre-lola-G; RYa-R; EMC4; GILT2; Ilp4; Brd; CREG; CG4572; CG6398; alpha4GT2; Rtnl1; Shrm

Session Info

R version 4.2.3 (2023-03-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] shiny_1.7.4      kableExtra_1.3.4 lubridate_1.9.2  forcats_1.0.0   
 [5] stringr_1.5.0    dplyr_1.1.2      purrr_1.0.1      readr_2.1.4     
 [9] tidyr_1.3.0      tibble_3.2.1     ggplot2_3.4.2    tidyverse_2.0.0 

loaded via a namespace (and not attached):
 [1] tidyselect_1.2.0  xfun_0.39         colorspace_2.1-0  vctrs_0.6.2      
 [5] generics_0.1.3    htmltools_0.5.5   viridisLite_0.4.2 yaml_2.3.7       
 [9] utf8_1.2.3        rlang_1.1.1       pillar_1.9.0      later_1.3.1      
[13] glue_1.6.2        withr_2.5.0       lifecycle_1.0.3   munsell_0.5.0    
[17] gtable_0.3.3      rvest_1.0.3       htmlwidgets_1.6.2 evaluate_0.21    
[21] labeling_0.4.2    knitr_1.43        tzdb_0.4.0        fastmap_1.1.1    
[25] httpuv_1.6.11     fansi_1.0.4       Rcpp_1.0.10       xtable_1.8-4     
[29] scales_1.2.1      promises_1.2.0.1  webshot_0.5.4     jsonlite_1.8.5   
[33] farver_2.1.1      mime_0.12         systemfonts_1.0.4 png_0.1-8        
[37] hms_1.1.3         digest_0.6.31     stringi_1.7.12    cowplot_1.1.1    
[41] rprojroot_2.0.3   grid_4.2.3        here_1.0.1        cli_3.6.1        
[45] tools_4.2.3       magrittr_2.0.3    pkgconfig_2.0.3   ellipsis_0.3.2   
[49] xml2_1.3.4        timechange_0.2.0  rmarkdown_2.22    svglite_2.1.1    
[53] httr_1.4.6        rstudioapi_0.14   R6_2.5.1          compiler_4.2.3